Skip to content
Snippets Groups Projects
Commit 4c3c397b authored by Jan Tattermusch's avatar Jan Tattermusch
Browse files

Merge pull request #4783 from ctiller/cpu-cost

Use CPU cost modelling to better utilize test resources
parents 6990ce22 0eef9eef
Branches
Tags
No related merge requests found
......@@ -922,6 +922,7 @@ targets:
- gpr_test_util
- gpr
- name: dualstack_socket_test
cpu_cost: 0.1
build: test
language: c
src:
......@@ -996,6 +997,7 @@ targets:
- gpr_test_util
- gpr
- name: fling_stream_test
cpu_cost: 2
build: test
language: c
src:
......@@ -1010,6 +1012,7 @@ targets:
- linux
- posix
- name: fling_test
cpu_cost: 2
build: test
language: c
src:
......@@ -1118,6 +1121,7 @@ targets:
- gpr_test_util
- gpr
- name: gpr_stack_lockfree_test
cpu_cost: 10
build: test
language: c
src:
......@@ -1134,6 +1138,7 @@ targets:
- gpr_test_util
- gpr
- name: gpr_sync_test
cpu_cost: 10
build: test
language: c
src:
......@@ -1142,6 +1147,7 @@ targets:
- gpr_test_util
- gpr
- name: gpr_thd_test
cpu_cost: 10
build: test
language: c
src:
......@@ -1368,6 +1374,7 @@ targets:
- gpr_test_util
- gpr
- name: httpcli_test
cpu_cost: 0.5
build: test
language: c
src:
......@@ -1382,6 +1389,7 @@ targets:
- linux
- posix
- name: httpscli_test
cpu_cost: 0.5
build: test
language: c
src:
......@@ -1463,6 +1471,7 @@ targets:
- gpr_test_util
- gpr
- name: lb_policies_test
cpu_cost: 0.1
build: test
language: c
src:
......@@ -1515,6 +1524,7 @@ targets:
- gpr_test_util
- gpr
- name: no_server_test
cpu_cost: 0.1
build: test
language: c
src:
......@@ -1575,6 +1585,7 @@ targets:
- gpr_test_util
- gpr
- name: set_initial_connect_string_test
cpu_cost: 0.1
build: test
language: c
src:
......@@ -1620,6 +1631,7 @@ targets:
- linux
- posix
- name: tcp_client_posix_test
cpu_cost: 0.5
build: test
language: c
src:
......@@ -1634,6 +1646,7 @@ targets:
- linux
- posix
- name: tcp_posix_test
cpu_cost: 0.5
build: test
language: c
src:
......@@ -1863,6 +1876,7 @@ targets:
- gpr_test_util
- gpr
- name: client_crash_test
cpu_cost: 0.1
build: test
language: c++
src:
......@@ -1941,6 +1955,7 @@ targets:
- gpr_test_util
- gpr
- name: end2end_test
cpu_cost: 0.5
build: test
language: c++
src:
......@@ -2084,6 +2099,7 @@ targets:
- linux
- posix
- name: interop_test
cpu_cost: 0.1
build: test
language: c++
src:
......@@ -2175,6 +2191,7 @@ targets:
- linux
- posix
- name: qps_test
cpu_cost: 10
build: test
language: c++
src:
......@@ -2277,6 +2294,7 @@ targets:
- linux
- posix
- name: server_crash_test
cpu_cost: 0.1
build: test
language: c++
src:
......@@ -2405,6 +2423,7 @@ targets:
- linux
- posix
- name: thread_stress_test
cpu_cost: 100
build: test
language: c++
src:
......
......@@ -137,7 +137,8 @@ class Grpc(object):
'platforms': ['linux', 'mac', 'posix', 'windows'],
'flaky': False,
'language': 'c++',
'boringssl': True
'boringssl': True,
'cpu_cost': 1.0
}
for test in files['tests']
]
......
......@@ -10,7 +10,8 @@
"ci_platforms": tgt.ci_platforms,
"exclude_configs": tgt.get("exclude_configs", []),
"args": [],
"flaky": tgt.flaky}
"flaky": tgt.flaky,
"cpu_cost": tgt.get("cpu_cost", 1.0)}
for tgt in targets
if tgt.get('run', True) and tgt.build == 'test'] +
tests,
......
#!/usr/bin/env python2.7
# Copyright 2015, Google Inc.
# Copyright 2015-2016, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
......@@ -35,15 +35,15 @@
import collections
import yaml
TestOptions = collections.namedtuple('TestOptions', 'flaky')
default_test_options = TestOptions(False)
TestOptions = collections.namedtuple('TestOptions', 'flaky cpu_cost')
default_test_options = TestOptions(False, 1.0)
# maps test names to options
BAD_CLIENT_TESTS = {
'badreq': default_test_options,
'connection_prefix': default_test_options,
'headers': default_test_options,
'initial_settings_frame': default_test_options,
'connection_prefix': default_test_options._replace(cpu_cost=0.2),
'headers': default_test_options._replace(cpu_cost=0.2),
'initial_settings_frame': default_test_options._replace(cpu_cost=0.2),
'server_registered_method': default_test_options,
'simple_request': default_test_options,
'window_overflow': default_test_options,
......@@ -75,6 +75,7 @@ def main():
'targets': [
{
'name': '%s_bad_client_test' % t,
'cpu_cost': BAD_CLIENT_TESTS[t].cpu_cost,
'build': 'test',
'language': 'c',
'secure': 'no',
......
#!/usr/bin/env python2.7
# Copyright 2015, Google Inc.
# Copyright 2015-2016, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
......@@ -35,13 +35,13 @@
import collections
import yaml
TestOptions = collections.namedtuple('TestOptions', 'flaky')
default_test_options = TestOptions(False)
TestOptions = collections.namedtuple('TestOptions', 'flaky cpu_cost')
default_test_options = TestOptions(False, 1.0)
# maps test names to options
BAD_CLIENT_TESTS = {
'cert': default_test_options,
'alpn': default_test_options,
'cert': default_test_options._replace(cpu_cost=0.1),
'alpn': default_test_options._replace(cpu_cost=0.1),
}
def main():
......@@ -84,6 +84,7 @@ def main():
for t in sorted(BAD_CLIENT_TESTS.keys())] + [
{
'name': 'bad_ssl_%s_test' % t,
'cpu_cost': BAD_CLIENT_TESTS[t].cpu_cost,
'build': 'test',
'language': 'c',
'src': ['test/core/bad_ssl/bad_ssl_test.c'],
......
......@@ -77,40 +77,42 @@ END2END_FIXTURES = {
}
TestOptions = collections.namedtuple(
'TestOptions', 'needs_fullstack needs_dns proxyable secure traceable')
default_test_options = TestOptions(False, False, True, False, True)
'TestOptions', 'needs_fullstack needs_dns proxyable secure traceable cpu_cost')
default_test_options = TestOptions(False, False, True, False, True, 1.0)
connectivity_test_options = default_test_options._replace(needs_fullstack=True)
LOWCPU = 0.1
# maps test names to options
END2END_TESTS = {
'bad_hostname': default_test_options,
'binary_metadata': default_test_options,
'call_creds': default_test_options._replace(secure=True),
'cancel_after_accept': default_test_options,
'cancel_after_client_done': default_test_options,
'cancel_after_invoke': default_test_options,
'cancel_before_invoke': default_test_options,
'cancel_in_a_vacuum': default_test_options,
'cancel_with_status': default_test_options,
'channel_connectivity': connectivity_test_options._replace(proxyable=False),
'cancel_after_accept': default_test_options._replace(cpu_cost=LOWCPU),
'cancel_after_client_done': default_test_options._replace(cpu_cost=LOWCPU),
'cancel_after_invoke': default_test_options._replace(cpu_cost=LOWCPU),
'cancel_before_invoke': default_test_options._replace(cpu_cost=LOWCPU),
'cancel_in_a_vacuum': default_test_options._replace(cpu_cost=LOWCPU),
'cancel_with_status': default_test_options._replace(cpu_cost=LOWCPU),
'channel_connectivity': connectivity_test_options._replace(proxyable=False, cpu_cost=LOWCPU),
'channel_ping': connectivity_test_options._replace(proxyable=False),
'compressed_payload': default_test_options._replace(proxyable=False),
'compressed_payload': default_test_options._replace(proxyable=False, cpu_cost=LOWCPU),
'default_host': default_test_options._replace(needs_fullstack=True,
needs_dns=True),
'disappearing_server': connectivity_test_options,
'empty_batch': default_test_options,
'graceful_server_shutdown': default_test_options,
'graceful_server_shutdown': default_test_options._replace(cpu_cost=LOWCPU),
'hpack_size': default_test_options._replace(proxyable=False,
traceable=False),
'high_initial_seqno': default_test_options,
'invoke_large_request': default_test_options,
'large_metadata': default_test_options,
'max_concurrent_streams': default_test_options._replace(proxyable=False),
'max_message_length': default_test_options,
'max_message_length': default_test_options._replace(cpu_cost=LOWCPU),
'metadata': default_test_options,
'negative_deadline': default_test_options,
'no_op': default_test_options,
'payload': default_test_options,
'payload': default_test_options._replace(cpu_cost=LOWCPU),
'ping_pong_streaming': default_test_options,
'registered_call': default_test_options,
'request_with_flags': default_test_options._replace(proxyable=False),
......@@ -118,7 +120,7 @@ END2END_TESTS = {
'server_finishes_request': default_test_options,
'shutdown_finishes_calls': default_test_options,
'shutdown_finishes_tags': default_test_options,
'simple_delayed_request': connectivity_test_options,
'simple_delayed_request': connectivity_test_options._replace(cpu_cost=LOWCPU),
'simple_request': default_test_options,
'trailing_metadata': default_test_options,
}
......@@ -252,6 +254,7 @@ def main():
END2END_FIXTURES[f].platforms, 'mac')),
'flaky': False,
'language': 'c',
'cpu_cost': END2END_TESTS[t].cpu_cost,
}
for f in sorted(END2END_FIXTURES.keys())
for t in sorted(END2END_TESTS.keys()) if compatible(f, t)
......@@ -266,6 +269,7 @@ def main():
END2END_FIXTURES[f].platforms, 'mac')),
'flaky': False,
'language': 'c',
'cpu_cost': END2END_TESTS[t].cpu_cost,
}
for f in sorted(END2END_FIXTURES.keys())
if not END2END_FIXTURES[f].secure
......
#!/usr/bin/env python2.7
# Copyright 2015, Google Inc.
# Copyright 2015-2016, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
......@@ -41,6 +41,7 @@ _TOP_LEVEL_KEYS = ['settings', 'proto_deps', 'filegroups', 'libs', 'targets', 'v
_VERSION_KEYS = ['major', 'minor', 'micro', 'build']
_ELEM_KEYS = [
'name',
'cpu_cost',
'flaky',
'build',
'run',
......
......@@ -33,6 +33,7 @@ import hashlib
import multiprocessing
import os
import platform
import re
import signal
import subprocess
import sys
......@@ -40,6 +41,10 @@ import tempfile
import time
# cpu cost measurement
measure_cpu_costs = False
_DEFAULT_MAX_JOBS = 16 * multiprocessing.cpu_count()
_MAX_RESULT_SIZE = 8192
......@@ -146,7 +151,7 @@ class JobSpec(object):
def __init__(self, cmdline, shortname=None, environ=None, hash_targets=None,
cwd=None, shell=False, timeout_seconds=5*60, flake_retries=0,
timeout_retries=0, kill_handler=None):
timeout_retries=0, kill_handler=None, cpu_cost=1.0):
"""
Arguments:
cmdline: a list of arguments to pass as the command line
......@@ -154,6 +159,7 @@ class JobSpec(object):
hash_targets: which files to include in the hash representing the jobs version
(or empty, indicating the job should not be hashed)
kill_handler: a handler that will be called whenever job.kill() is invoked
cpu_cost: number of cores per second this job needs
"""
if environ is None:
environ = {}
......@@ -169,6 +175,7 @@ class JobSpec(object):
self.flake_retries = flake_retries
self.timeout_retries = timeout_retries
self.kill_handler = kill_handler
self.cpu_cost = cpu_cost
def identity(self):
return '%r %r %r' % (self.cmdline, self.environ, self.hash_targets)
......@@ -218,7 +225,10 @@ class Job(object):
env.update(self._spec.environ)
env.update(self._add_env)
self._start = time.time()
try_start = lambda: subprocess.Popen(args=self._spec.cmdline,
cmdline = self._spec.cmdline
if measure_cpu_costs:
cmdline = ['time', '--portability'] + cmdline
try_start = lambda: subprocess.Popen(args=cmdline,
stderr=subprocess.STDOUT,
stdout=self._tempfile,
cwd=self._spec.cwd,
......@@ -267,8 +277,17 @@ class Job(object):
self.result.returncode = self._process.returncode
else:
self._state = _SUCCESS
message('PASSED', '%s [time=%.1fsec; retries=%d;%d]' % (
self._spec.shortname, elapsed, self._retries, self._timeout_retries),
measurement = ''
if measure_cpu_costs:
m = re.search(r'real ([0-9.]+)\nuser ([0-9.]+)\nsys ([0-9.]+)', stdout())
real = float(m.group(1))
user = float(m.group(2))
sys = float(m.group(3))
if real > 0.5:
cores = (user + sys) / real
measurement = '; cpu_cost=%.01f; estimated=%.01f' % (cores, self._spec.cpu_cost)
message('PASSED', '%s [time=%.1fsec; retries=%d:%d%s]' % (
self._spec.shortname, elapsed, self._retries, self._timeout_retries, measurement),
do_newline=self._newline_on_success or self._travis)
self.result.state = 'PASSED'
if self._bin_hash:
......@@ -329,10 +348,19 @@ class Jobset(object):
def get_num_failures(self):
return self._failures
def cpu_cost(self):
c = 0
for job in self._running:
c += job._spec.cpu_cost
return c
def start(self, spec):
"""Start a job. Return True on success, False on failure."""
while len(self._running) >= self._maxjobs:
while True:
if self.cancelled(): return False
current_cpu_cost = self.cpu_cost()
if current_cpu_cost == 0: break
if current_cpu_cost + spec.cpu_cost < self._maxjobs: break
self.reap()
if self.cancelled(): return False
if spec.hash_targets:
......
......@@ -78,7 +78,7 @@ class SimpleConfig(object):
self.timeout_multiplier = timeout_multiplier
def job_spec(self, cmdline, hash_targets, timeout_seconds=5*60,
shortname=None, environ={}):
shortname=None, environ={}, cpu_cost=1.0):
"""Construct a jobset.JobSpec for a test under this config
Args:
......@@ -96,6 +96,7 @@ class SimpleConfig(object):
return jobset.JobSpec(cmdline=cmdline,
shortname=shortname,
environ=actual_environ,
cpu_cost=cpu_cost,
timeout_seconds=self.timeout_multiplier * timeout_seconds,
hash_targets=hash_targets
if self.allow_hashing else None,
......@@ -114,11 +115,12 @@ class ValgrindConfig(object):
self.args = args
self.allow_hashing = False
def job_spec(self, cmdline, hash_targets):
def job_spec(self, cmdline, hash_targets, cpu_cost=1.0):
return jobset.JobSpec(cmdline=['valgrind', '--tool=%s' % self.tool] +
self.args + cmdline,
shortname='valgrind %s' % cmdline[0],
hash_targets=None,
cpu_cost=cpu_cost,
flake_retries=5 if args.allow_flakes else 0,
timeout_retries=3 if args.allow_flakes else 0)
......@@ -157,6 +159,7 @@ class CLanguage(object):
cmdline = [binary] + target['args']
out.append(config.job_spec(cmdline, [binary],
shortname=' '.join(cmdline),
cpu_cost=target['cpu_cost'],
environ={'GRPC_DEFAULT_SSL_ROOTS_FILE_PATH':
os.path.abspath(os.path.dirname(
sys.argv[0]) + '/../../src/core/tsi/test_creds/ca.pem')}))
......@@ -600,7 +603,7 @@ argp.add_argument('-n', '--runs_per_test', default=1, type=runs_per_test_type,
help='A positive integer or "inf". If "inf", all tests will run in an '
'infinite loop. Especially useful in combination with "-f"')
argp.add_argument('-r', '--regex', default='.*', type=str)
argp.add_argument('-j', '--jobs', default=2 * multiprocessing.cpu_count(), type=int)
argp.add_argument('-j', '--jobs', default=multiprocessing.cpu_count(), type=int)
argp.add_argument('-s', '--slowdown', default=1.0, type=float)
argp.add_argument('-f', '--forever',
default=False,
......@@ -647,6 +650,8 @@ argp.add_argument('--build_only',
action='store_const',
const=True,
help='Perform all the build steps but dont run any tests.')
argp.add_argument('--measure_cpu_costs', default=False, action='store_const', const=True,
help='Measure the cpu costs of tests')
argp.add_argument('--update_submodules', default=[], nargs='*',
help='Update some submodules before building. If any are updated, also run generate_projects. ' +
'Submodules are specified as SUBMODULE_NAME:BRANCH; if BRANCH is omitted, master is assumed.')
......@@ -655,6 +660,8 @@ argp.add_argument('-x', '--xml_report', default=None, type=str,
help='Generates a JUnit-compatible XML report')
args = argp.parse_args()
jobset.measure_cpu_costs = args.measure_cpu_costs
if args.use_docker:
if not args.travis:
print 'Seen --use_docker flag, will run tests under docker.'
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment