diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py index a9c96a6c95a309b892fe399f19c989464766cc6d..8343441a189e09ccc4933b9ddcac2a2ff4b997af 100755 --- a/tools/run_tests/jobset.py +++ b/tools/run_tests/jobset.py @@ -272,6 +272,8 @@ class Job(object): message('TIMEOUT_FLAKE', self._spec.shortname, stdout, do_newline=True) self._timeout_retries += 1 self.result.retries = self._timeout_retries + self._retries + if self._spec.kill_handler: + self._spec.kill_handler(self) self._process.terminate() self.start() else: diff --git a/tools/run_tests/run_interop_tests.py b/tools/run_tests/run_interop_tests.py index c2705c8cab26273e5316e18bda208a86adde915e..0f3b824996ad76b7386d538ae10207664a886f43 100755 --- a/tools/run_tests/run_interop_tests.py +++ b/tools/run_tests/run_interop_tests.py @@ -344,6 +344,11 @@ def add_auth_options(language, test_case, cmdline, env): def _job_kill_handler(job): if job._spec.container_name: dockerjob.docker_kill(job._spec.container_name) + # When the job times out and we decide to kill it, + # we need to wait a before restarting the job + # to prevent "container name already in use" error. + # TODO(jtattermusch): figure out a cleaner way to to this. + time.sleep(2) def cloud_to_prod_jobspec(language, test_case, docker_image=None, auth=False):