diff --git a/tools/run_tests/jobset.py b/tools/run_tests/jobset.py
index c0b9c02b3e6103f75a2c362b938f720c93e11dd1..6ddc4d29bc5da03e7de203c79155df24c1b40577 100755
--- a/tools/run_tests/jobset.py
+++ b/tools/run_tests/jobset.py
@@ -72,6 +72,7 @@ _COLORS = {
     'yellow': [ 33, 0 ],
     'lightgray': [ 37, 0],
     'gray': [ 30, 1 ],
+    'purple': [ 35, 0 ],
     }
 
 
@@ -81,6 +82,7 @@ _CLEAR_LINE = '\x1b[2K'
 
 _TAG_COLOR = {
     'FAILED': 'red',
+    'FLAKE': 'purple',
     'WARNING': 'yellow',
     'TIMEOUT': 'red',
     'PASSED': 'green',
@@ -131,7 +133,7 @@ class JobSpec(object):
   """Specifies what to run for a job."""
 
   def __init__(self, cmdline, shortname=None, environ=None, hash_targets=None,
-               cwd=None, shell=False, timeout_seconds=5*60):
+               cwd=None, shell=False, timeout_seconds=5*60, flake_retries=5):
     """
     Arguments:
       cmdline: a list of arguments to pass as the command line
@@ -150,6 +152,7 @@ class JobSpec(object):
     self.cwd = cwd
     self.shell = shell
     self.timeout_seconds = timeout_seconds
+    self.flake_retries = flake_retries
 
   def identity(self):
     return '%r %r %r' % (self.cmdline, self.environ, self.hash_targets)
@@ -167,25 +170,28 @@ class Job(object):
   def __init__(self, spec, bin_hash, newline_on_success, travis, add_env, xml_report):
     self._spec = spec
     self._bin_hash = bin_hash
+    self._newline_on_success = newline_on_success
+    self._travis = travis
+    self._add_env = add_env.copy()
+    self._xml_test = ET.SubElement(xml_report, 'testcase',
+                                   name=self._spec.shortname) if xml_report is not None else None
+    self._retries = 0
+    message('START', spec.shortname, do_newline=self._travis)
+    self.start()
+
+  def start(self):
     self._tempfile = tempfile.TemporaryFile()
-    env = os.environ.copy()
-    for k, v in spec.environ.iteritems():
-      env[k] = v
-    for k, v in add_env.iteritems():
-      env[k] = v
+    env = dict(os.environ)
+    env.update(self._spec.environ)
+    env.update(self._add_env)
     self._start = time.time()
-    message('START', spec.shortname, do_newline=travis)
-    self._process = subprocess.Popen(args=spec.cmdline,
+    self._process = subprocess.Popen(args=self._spec.cmdline,
                                      stderr=subprocess.STDOUT,
                                      stdout=self._tempfile,
-                                     cwd=spec.cwd,
-                                     shell=spec.shell,
+                                     cwd=self._spec.cwd,
+                                     shell=self._spec.shell,
                                      env=env)
     self._state = _RUNNING
-    self._newline_on_success = newline_on_success
-    self._travis = travis
-    self._xml_test = ET.SubElement(xml_report, 'testcase',
-                                   name=self._spec.shortname) if xml_report is not None else None
 
   def state(self, update_cache):
     """Poll current state of the job. Prints messages at completion."""
@@ -202,15 +208,22 @@ class Job(object):
         self._xml_test.set('time', str(elapsed))
         ET.SubElement(self._xml_test, 'system-out').text = filtered_stdout
       if self._process.returncode != 0:
-        self._state = _FAILURE
-        message('FAILED', '%s [ret=%d, pid=%d]' % (
+        if self._retries < self._spec.flake_retries:
+          message('FLAKE', '%s [ret=%d, pid=%d]' % (
             self._spec.shortname, self._process.returncode, self._process.pid),
             stdout, do_newline=True)
-        if self._xml_test is not None:
-          ET.SubElement(self._xml_test, 'failure', message='Failure').text
+          self._retries += 1
+          self.start()
+        else:
+          self._state = _FAILURE
+          message('FAILED', '%s [ret=%d, pid=%d]' % (
+              self._spec.shortname, self._process.returncode, self._process.pid),
+              stdout, do_newline=True)
+          if self._xml_test is not None:
+            ET.SubElement(self._xml_test, 'failure', message='Failure').text
       else:
         self._state = _SUCCESS
-        message('PASSED', '%s [time=%.1fsec]' % (self._spec.shortname, elapsed),
+        message('PASSED', '%s [time=%.1fsec; retries=%d]' % (self._spec.shortname, elapsed, self._retries),
                 do_newline=self._newline_on_success or self._travis)
         if self._bin_hash:
           update_cache.finished(self._spec.identity(), self._bin_hash)