From e1f5302bc5a00d8fb93943f9b958b10d6cd8578d Mon Sep 17 00:00:00 2001 From: Craig Tiller <ctiller@google.com> Date: Thu, 4 May 2017 19:53:19 +0000 Subject: [PATCH] Port server fixes - correct a couple of race conditions that could result in duplicated port assignments to different processes - enhance detection code for 'is this port in use' to be much more robust --- tools/run_tests/python_utils/port_server.py | 50 +++++++++++++++------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/tools/run_tests/python_utils/port_server.py b/tools/run_tests/python_utils/port_server.py index e96ee0b08c..079ed70fbf 100755 --- a/tools/run_tests/python_utils/port_server.py +++ b/tools/run_tests/python_utils/port_server.py @@ -39,6 +39,7 @@ import os import socket import sys import time +import random from SocketServer import ThreadingMixIn import threading @@ -46,7 +47,7 @@ import threading # increment this number whenever making a change to ensure that # the changes are picked up by running CI servers # note that all changes must be backwards compatible -_MY_VERSION = 14 +_MY_VERSION = 19 if len(sys.argv) == 2 and sys.argv[1] == 'dump_version': @@ -72,10 +73,33 @@ pool = [] in_use = {} mu = threading.Lock() +def can_connect(port): + s = socket.socket() + try: + s.connect(('localhost', port)) + return True + except socket.error, e: + return False + finally: + s.close() + +def can_bind(port, proto): + s = socket.socket(proto, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + try: + s.bind(('localhost', port)) + return True + except socket.error, e: + return False + finally: + s.close() + def refill_pool(max_timeout, req): """Scan for ports not marked for being in use""" - for i in range(1025, 32766): + chk = list(range(1025, 32766)) + random.shuffle(chk) + for i in chk: if len(pool) > 100: break if i in in_use: age = time.time() - in_use[i] @@ -83,16 +107,9 @@ def refill_pool(max_timeout, req): continue req.log_message("kill old request %d" % i) del in_use[i] - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - try: - s.bind(('localhost', i)) + if can_bind(i, socket.AF_INET) and can_bind(i, socket.AF_INET6) and not can_connect(i): req.log_message("found available port %d" % i) pool.append(i) - except: - pass # we really don't care about failures - finally: - s.close() def allocate_port(req): @@ -128,6 +145,7 @@ class Handler(BaseHTTPRequestHandler): def do_GET(self): global keep_running + global mu if self.path == '/get': # allocate a new port, it will stay bound for ten minutes and until # it's unused @@ -142,12 +160,15 @@ class Handler(BaseHTTPRequestHandler): self.send_header('Content-Type', 'text/plain') self.end_headers() p = int(self.path[6:]) + mu.acquire() if p in in_use: del in_use[p] pool.append(p) - self.log_message('drop known port %d' % p) + k = 'known' else: - self.log_message('drop unknown port %d' % p) + k = 'unknown' + mu.release() + self.log_message('drop %s port %d' % (k, p)) elif self.path == '/version_number': # fetch a version string and the current process pid self.send_response(200) @@ -161,8 +182,11 @@ class Handler(BaseHTTPRequestHandler): self.send_response(200) self.send_header('Content-Type', 'text/plain') self.end_headers() + mu.acquire() now = time.time() - self.wfile.write(yaml.dump({'pool': pool, 'in_use': dict((k, now - v) for k, v in in_use.items())})) + out = yaml.dump({'pool': pool, 'in_use': dict((k, now - v) for k, v in in_use.items())}) + mu.release() + self.wfile.write(out) elif self.path == '/quitquitquit': self.send_response(200) self.end_headers() -- GitLab