From c15d32bbe89a2bf950992ded06d1b3da7f1f39a6 Mon Sep 17 00:00:00 2001
From: Matt Kwong <mattkwong@google.com>
Date: Fri, 30 Jun 2017 11:30:21 -0700
Subject: [PATCH] Don't clear alarm in jobset when running performance tests

---
 tools/run_tests/python_utils/jobset.py   | 13 +++++++++----
 tools/run_tests/run_performance_tests.py | 10 +++++-----
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/tools/run_tests/python_utils/jobset.py b/tools/run_tests/python_utils/jobset.py
index 044c6f3aa4..08d652ae3f 100755
--- a/tools/run_tests/python_utils/jobset.py
+++ b/tools/run_tests/python_utils/jobset.py
@@ -367,9 +367,10 @@ class Jobset(object):
   """Manages one run of jobs."""
 
   def __init__(self, check_cancelled, maxjobs, newline_on_success, travis,
-               stop_on_failure, add_env, quiet_success, max_time):
+               stop_on_failure, add_env, quiet_success, max_time, clear_alarms):
     self._running = set()
     self._check_cancelled = check_cancelled
+    self._clear_alarms = clear_alarms
     self._cancelled = False
     self._failures = 0
     self._completed = 0
@@ -473,7 +474,10 @@ class Jobset(object):
     while self._running:
       if self.cancelled(): pass  # poll cancellation
       self.reap()
-    if platform_string() != 'windows':
+    # Clear the alarms when finished to avoid a race condition causing job
+    # failures. Don't do this when running multi-VM tests because clearing
+    # the alarms causes the test to stall
+    if platform_string() != 'windows' and self._clear_alarms:
       signal.alarm(0)
     return not self.cancelled() and self._failures == 0
 
@@ -503,7 +507,8 @@ def run(cmdlines,
         add_env={},
         skip_jobs=False,
         quiet_success=False,
-        max_time=-1):
+        max_time=-1,
+        clear_alarms=True):
   if skip_jobs:
     resultset = {}
     skipped_job_result = JobResult()
@@ -515,7 +520,7 @@ def run(cmdlines,
   js = Jobset(check_cancelled,
               maxjobs if maxjobs is not None else _DEFAULT_MAX_JOBS,
               newline_on_success, travis, stop_on_failure, add_env,
-              quiet_success, max_time)
+              quiet_success, max_time, clear_alarms)
   for cmdline, remaining in tag_remaining(cmdlines):
     if not js.start(cmdline):
       break
diff --git a/tools/run_tests/run_performance_tests.py b/tools/run_tests/run_performance_tests.py
index ad1fb05481..78d1079c15 100755
--- a/tools/run_tests/run_performance_tests.py
+++ b/tools/run_tests/run_performance_tests.py
@@ -183,7 +183,7 @@ def archive_repo(languages):
 
   jobset.message('START', 'Archiving local repository.', do_newline=True)
   num_failures, _ = jobset.run(
-      [archive_job], newline_on_success=True, maxjobs=1)
+      [archive_job], newline_on_success=True, maxjobs=1, clear_alarms=False)
   if num_failures == 0:
     jobset.message('SUCCESS',
                    'Archive with local repository created successfully.',
@@ -215,7 +215,7 @@ def prepare_remote_hosts(hosts, prepare_local=False):
             timeout_seconds=prepare_timeout))
   jobset.message('START', 'Preparing hosts.', do_newline=True)
   num_failures, _ = jobset.run(
-      prepare_jobs, newline_on_success=True, maxjobs=10)
+      prepare_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False)
   if num_failures == 0:
     jobset.message('SUCCESS',
                    'Prepare step completed successfully.',
@@ -248,7 +248,7 @@ def build_on_remote_hosts(hosts, languages=scenario_config.LANGUAGES.keys(), bui
             timeout_seconds=build_timeout))
   jobset.message('START', 'Building.', do_newline=True)
   num_failures, _ = jobset.run(
-      build_jobs, newline_on_success=True, maxjobs=10)
+      build_jobs, newline_on_success=True, maxjobs=10, clear_alarms=False)
   if num_failures == 0:
     jobset.message('SUCCESS',
                    'Built successfully.',
@@ -414,7 +414,7 @@ def run_collect_perf_profile_jobs(hosts_and_base_names, scenario_name):
     perf_report_jobs.append(perf_report_processor_job(host, perf_base_name, output_filename))
 
   jobset.message('START', 'Collecting perf reports from qps workers', do_newline=True)
-  failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1)
+  failures, _ = jobset.run(perf_report_jobs, newline_on_success=True, maxjobs=1, clear_alarms=False)
   jobset.message('END', 'Collecting perf reports from qps workers', do_newline=True)
   return failures
 
@@ -556,7 +556,7 @@ for scenario in scenarios:
       jobs = [scenario.jobspec]
       if scenario.workers:
         jobs.append(create_quit_jobspec(scenario.workers, remote_host=args.remote_driver_host))
-      scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1)
+      scenario_failures, resultset = jobset.run(jobs, newline_on_success=True, maxjobs=1, clear_alarms=False)
       total_scenario_failures += scenario_failures
       merged_resultset = dict(itertools.chain(six.iteritems(merged_resultset),
                                               six.iteritems(resultset)))
-- 
GitLab