diff --git a/test/cpp/qps/client.h b/test/cpp/qps/client.h
index 4045e13460f56bb517c2995565093ad7598fb368..fada4ba767965b300b59a852eedda42b9e90f43c 100644
--- a/test/cpp/qps/client.h
+++ b/test/cpp/qps/client.h
@@ -169,6 +169,7 @@ class Client {
   // Must call AwaitThreadsCompletion before destructor to avoid a race
   // between destructor and invocation of virtual ThreadFunc
   void AwaitThreadsCompletion() {
+    gpr_atm_rel_store(&thread_pool_done_, static_cast<gpr_atm>(true));
     DestroyMultithreading();
     std::unique_lock<std::mutex> g(thread_completion_mu_);
     while (threads_remaining_ != 0) {
@@ -178,8 +179,10 @@ class Client {
 
  protected:
   bool closed_loop_;
+  gpr_atm thread_pool_done_;
 
   void StartThreads(size_t num_threads) {
+    gpr_atm_rel_store(&thread_pool_done_, static_cast<gpr_atm>(false));
     threads_remaining_ = num_threads;
     for (size_t i = 0; i < num_threads; i++) {
       threads_.emplace_back(new Thread(this, i));
@@ -241,18 +244,9 @@ class Client {
   class Thread {
    public:
     Thread(Client* client, size_t idx)
-        : done_(false),
-          client_(client),
-          idx_(idx),
-          impl_(&Thread::ThreadFunc, this) {}
+        : client_(client), idx_(idx), impl_(&Thread::ThreadFunc, this) {}
 
-    ~Thread() {
-      {
-        std::lock_guard<std::mutex> g(mu_);
-        done_ = true;
-      }
-      impl_.join();
-    }
+    ~Thread() { impl_.join(); }
 
     void BeginSwap(Histogram* n) {
       std::lock_guard<std::mutex> g(mu_);
@@ -282,9 +276,9 @@ class Client {
         }
         if (!thread_still_ok) {
           gpr_log(GPR_ERROR, "Finishing client thread due to RPC error");
-          done_ = true;
         }
-        if (done_) {
+        if (!thread_still_ok ||
+            static_cast<bool>(gpr_atm_acq_load(&client_->thread_pool_done_))) {
           client_->CompleteThread();
           return;
         }
@@ -292,7 +286,6 @@ class Client {
     }
 
     std::mutex mu_;
-    bool done_;
     Histogram histogram_;
     Client* client_;
     const size_t idx_;
diff --git a/test/cpp/qps/client_sync.cc b/test/cpp/qps/client_sync.cc
index 25c78235532625280fb18fc51980b7a4d4c7fc38..8062424a1fbaf6c92ffe402e1135dcb9377ee2f0 100644
--- a/test/cpp/qps/client_sync.cc
+++ b/test/cpp/qps/client_sync.cc
@@ -79,10 +79,29 @@ class SynchronousClient
   virtual ~SynchronousClient(){};
 
  protected:
-  void WaitToIssue(int thread_idx) {
+  // WaitToIssue returns false if we realize that we need to break out
+  bool WaitToIssue(int thread_idx) {
     if (!closed_loop_) {
-      gpr_sleep_until(NextIssueTime(thread_idx));
+      const gpr_timespec next_issue_time = NextIssueTime(thread_idx);
+      // Avoid sleeping for too long continuously because we might
+      // need to terminate before then. This is an issue since
+      // exponential distribution can occasionally produce bad outliers
+      while (true) {
+        const gpr_timespec one_sec_delay =
+            gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
+                         gpr_time_from_seconds(1, GPR_TIMESPAN));
+        if (gpr_time_cmp(next_issue_time, one_sec_delay) <= 0) {
+          gpr_sleep_until(next_issue_time);
+          return true;
+        } else {
+          gpr_sleep_until(one_sec_delay);
+          if (gpr_atm_acq_load(&thread_pool_done_) != static_cast<gpr_atm>(0)) {
+            return false;
+          }
+        }
+      }
     }
+    return true;
   }
 
   size_t num_threads_;
@@ -101,7 +120,9 @@ class SynchronousUnaryClient GRPC_FINAL : public SynchronousClient {
   ~SynchronousUnaryClient() {}
 
   bool ThreadFunc(HistogramEntry* entry, size_t thread_idx) GRPC_OVERRIDE {
-    WaitToIssue(thread_idx);
+    if (!WaitToIssue(thread_idx)) {
+      return true;
+    }
     auto* stub = channels_[thread_idx % channels_.size()].get_stub();
     double start = UsageTimer::Now();
     GPR_TIMER_SCOPE("SynchronousUnaryClient::ThreadFunc", 0);
@@ -144,7 +165,9 @@ class SynchronousStreamingClient GRPC_FINAL : public SynchronousClient {
   }
 
   bool ThreadFunc(HistogramEntry* entry, size_t thread_idx) GRPC_OVERRIDE {
-    WaitToIssue(thread_idx);
+    if (!WaitToIssue(thread_idx)) {
+      return true;
+    }
     GPR_TIMER_SCOPE("SynchronousStreamingClient::ThreadFunc", 0);
     double start = UsageTimer::Now();
     if (stream_[thread_idx]->Write(request_) &&