From ae3fd4252a66bf54fbefd76be54a53a65b995efc Mon Sep 17 00:00:00 2001
From: David Garcia Quintas <dgq@google.com>
Date: Wed, 30 Nov 2016 14:31:57 -0800
Subject: [PATCH] Deflaked lb_policies_test

---
 test/core/client_channel/lb_policies_test.c | 116 +++++++-------------
 1 file changed, 38 insertions(+), 78 deletions(-)

diff --git a/test/core/client_channel/lb_policies_test.c b/test/core/client_channel/lb_policies_test.c
index 95595d7c51..47836bd32c 100644
--- a/test/core/client_channel/lb_policies_test.c
+++ b/test/core/client_channel/lb_policies_test.c
@@ -64,9 +64,11 @@ typedef struct servers_fixture {
 } servers_fixture;
 
 typedef struct request_sequences {
-  size_t n;
-  int *connections;
-  int *connectivity_states;
+  size_t n;         /* number of iterations */
+  int *connections; /* indexed by the interation number, value is the index of
+                       the server it connected to or -1 if none */
+  int *connectivity_states; /* indexed by the interation number, value is the
+                               client connectivity state */
 } request_sequences;
 
 typedef void (*verifier_fn)(const servers_fixture *, grpc_channel *,
@@ -780,15 +782,18 @@ static void verify_total_carnage_round_robin(const servers_fixture *f,
     }
   }
 
-  /* no server is ever available. The persistent state is TRANSIENT_FAILURE */
+  /* no server is ever available. The persistent state is TRANSIENT_FAILURE. May
+   * also be CONNECTING if, under load, this check took too long to run and some
+   * subchannel already transitioned to retrying. */
   for (size_t i = 0; i < sequences->n; i++) {
     const grpc_connectivity_state actual = sequences->connectivity_states[i];
-    const grpc_connectivity_state expected = GRPC_CHANNEL_TRANSIENT_FAILURE;
-    if (actual != expected) {
+    const uint32_t expected_bitset =
+        GRPC_CHANNEL_TRANSIENT_FAILURE | GRPC_CHANNEL_CONNECTING;
+    if (((uint32_t)actual & expected_bitset) == 0) {
       gpr_log(GPR_ERROR,
-              "CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
-              "at iteration #%d",
-              grpc_connectivity_state_name(expected),
+              "CONNECTIVITY STATUS SEQUENCE FAILURE: expected "
+              "GRPC_CHANNEL_TRANSIENT_FAILURE or GRPC_CHANNEL_CONNECTING, got "
+              "'%s' at iteration #%d",
               grpc_connectivity_state_name(actual), (int)i);
       abort();
     }
@@ -825,8 +830,7 @@ static void verify_partial_carnage_round_robin(
   }
 
   /* We can assert that the first client channel state should be READY, when all
-   * servers were available; and that the last one should be TRANSIENT_FAILURE,
-   * after all servers are gone. */
+   * servers were available */
   grpc_connectivity_state actual = sequences->connectivity_states[0];
   grpc_connectivity_state expected = GRPC_CHANNEL_READY;
   if (actual != expected) {
@@ -838,15 +842,21 @@ static void verify_partial_carnage_round_robin(
     abort();
   }
 
+  /* ... and that the last one should be TRANSIENT_FAILURE, after all servers
+   * are gone. May also be CONNECTING if, under load, this check took too long
+   * to run and the subchannel already transitioned to retrying. */
   actual = sequences->connectivity_states[num_iters - 1];
-  expected = GRPC_CHANNEL_TRANSIENT_FAILURE;
-  if (actual != expected) {
-    gpr_log(GPR_ERROR,
-            "CONNECTIVITY STATUS SEQUENCE FAILURE: expected '%s', got '%s' "
-            "at iteration #%d",
-            grpc_connectivity_state_name(expected),
-            grpc_connectivity_state_name(actual), (int)num_iters - 1);
-    abort();
+  const uint32_t expected_bitset =
+      GRPC_CHANNEL_TRANSIENT_FAILURE | GRPC_CHANNEL_CONNECTING;
+  for (i = 0; i < sequences->n; i++) {
+    if (((uint32_t)actual & expected_bitset) == 0) {
+      gpr_log(GPR_ERROR,
+              "CONNECTIVITY STATUS SEQUENCE FAILURE: expected "
+              "GRPC_CHANNEL_TRANSIENT_FAILURE or GRPC_CHANNEL_CONNECTING, got "
+              "'%s' at iteration #%d",
+              grpc_connectivity_state_name(actual), (int)i);
+      abort();
+    }
   }
 
   gpr_free(expected_connection_sequence);
@@ -873,68 +883,21 @@ static void verify_rebirth_round_robin(const servers_fixture *f,
                                        grpc_channel *client,
                                        const request_sequences *sequences,
                                        const size_t num_iters) {
-  int *expected_connection_sequence;
-  size_t i, j, unique_seq_last_idx, unique_seq_first_idx;
-  const size_t expected_seq_length = f->num_servers;
-  int *seen_elements;
-
   dump_array("actual_connection_sequence", sequences->connections, num_iters);
 
-  /* verify conn. seq. expectation */
-  /* get the first unique run of length "num_servers". */
-  expected_connection_sequence = gpr_malloc(sizeof(int) * expected_seq_length);
-  seen_elements = gpr_malloc(sizeof(int) * expected_seq_length);
-
-  unique_seq_last_idx = ~(size_t)0;
-
-  memset(seen_elements, 0, sizeof(int) * expected_seq_length);
-  for (i = 0; i < num_iters; i++) {
-    if (sequences->connections[i] < 0 ||
-        seen_elements[sequences->connections[i]] != 0) {
-      /* if anything breaks the uniqueness of the run, back to square zero */
-      memset(seen_elements, 0, sizeof(int) * expected_seq_length);
-      continue;
-    }
-    seen_elements[sequences->connections[i]] = 1;
-    for (j = 0; j < expected_seq_length; j++) {
-      if (seen_elements[j] == 0) break;
-    }
-    if (j == expected_seq_length) { /* seen all the elements */
-      unique_seq_last_idx = i;
-      break;
-    }
-  }
-  /* make sure we found a valid run */
-  dump_array("seen_elements", seen_elements, expected_seq_length);
-  for (j = 0; j < expected_seq_length; j++) {
-    GPR_ASSERT(seen_elements[j] != 0);
-  }
-
-  GPR_ASSERT(unique_seq_last_idx != ~(size_t)0);
-
-  unique_seq_first_idx = (unique_seq_last_idx - expected_seq_length + 1);
-  memcpy(expected_connection_sequence,
-         sequences->connections + unique_seq_first_idx,
-         sizeof(int) * expected_seq_length);
-
   /* first iteration succeeds */
   GPR_ASSERT(sequences->connections[0] != -1);
   /* then we fail for a while... */
   GPR_ASSERT(sequences->connections[1] == -1);
-  /* ... but should be up at "unique_seq_first_idx" */
-  GPR_ASSERT(sequences->connections[unique_seq_first_idx] != -1);
-
-  for (j = 0, i = unique_seq_first_idx; i < num_iters; i++) {
-    const int actual = sequences->connections[i];
-    const int expected =
-        expected_connection_sequence[j++ % expected_seq_length];
-    if (actual != expected) {
-      print_failed_expectations(expected_connection_sequence,
-                                sequences->connections, expected_seq_length,
-                                num_iters);
-      abort();
+  /* ... but should be up eventually */
+  size_t first_iter_back_up = ~0ul;
+  for (size_t i = 2; i < sequences->n; ++i) {
+    if (sequences->connections[i] != -1) {
+      first_iter_back_up = i;
+      break;
     }
   }
+  GPR_ASSERT(first_iter_back_up != ~0ul);
 
   /* We can assert that the first client channel state should be READY, when all
    * servers were available; same thing for the last one. In the middle
@@ -962,7 +925,7 @@ static void verify_rebirth_round_robin(const servers_fixture *f,
   }
 
   bool found_failure_status = false;
-  for (i = 1; i < sequences->n - 1; i++) {
+  for (size_t i = 1; i < sequences->n - 1; i++) {
     if (sequences->connectivity_states[i] == GRPC_CHANNEL_TRANSIENT_FAILURE) {
       found_failure_status = true;
       break;
@@ -974,14 +937,11 @@ static void verify_rebirth_round_robin(const servers_fixture *f,
         "CONNECTIVITY STATUS SEQUENCE FAILURE: "
         "GRPC_CHANNEL_TRANSIENT_FAILURE status not found. Got the following "
         "instead:");
-    for (i = 0; i < num_iters; i++) {
+    for (size_t i = 0; i < num_iters; i++) {
       gpr_log(GPR_ERROR, "[%d]: %s", (int)i,
               grpc_connectivity_state_name(sequences->connectivity_states[i]));
     }
   }
-
-  gpr_free(expected_connection_sequence);
-  gpr_free(seen_elements);
 }
 
 int main(int argc, char **argv) {
-- 
GitLab