From e1f5302bc5a00d8fb93943f9b958b10d6cd8578d Mon Sep 17 00:00:00 2001
From: Craig Tiller <ctiller@google.com>
Date: Thu, 4 May 2017 19:53:19 +0000
Subject: [PATCH] Port server fixes

- correct a couple of race conditions that could result in duplicated port assignments to different processes
- enhance detection code for 'is this port in use' to be much more robust
---
 tools/run_tests/python_utils/port_server.py | 50 +++++++++++++++------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/tools/run_tests/python_utils/port_server.py b/tools/run_tests/python_utils/port_server.py
index e96ee0b08c..079ed70fbf 100755
--- a/tools/run_tests/python_utils/port_server.py
+++ b/tools/run_tests/python_utils/port_server.py
@@ -39,6 +39,7 @@ import os
 import socket
 import sys
 import time
+import random
 from SocketServer import ThreadingMixIn
 import threading
 
@@ -46,7 +47,7 @@ import threading
 # increment this number whenever making a change to ensure that
 # the changes are picked up by running CI servers
 # note that all changes must be backwards compatible
-_MY_VERSION = 14
+_MY_VERSION = 19
 
 
 if len(sys.argv) == 2 and sys.argv[1] == 'dump_version':
@@ -72,10 +73,33 @@ pool = []
 in_use = {}
 mu = threading.Lock()
 
+def can_connect(port):
+  s = socket.socket()
+  try:
+    s.connect(('localhost', port))
+    return True
+  except socket.error, e:
+    return False
+  finally:
+    s.close()
+
+def can_bind(port, proto):
+  s = socket.socket(proto, socket.SOCK_STREAM)
+  s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+  try:
+    s.bind(('localhost', port))
+    return True
+  except socket.error, e:
+    return False
+  finally:
+    s.close()
+
 
 def refill_pool(max_timeout, req):
   """Scan for ports not marked for being in use"""
-  for i in range(1025, 32766):
+  chk = list(range(1025, 32766))
+  random.shuffle(chk)
+  for i in chk:
     if len(pool) > 100: break
     if i in in_use:
       age = time.time() - in_use[i]
@@ -83,16 +107,9 @@ def refill_pool(max_timeout, req):
         continue
       req.log_message("kill old request %d" % i)
       del in_use[i]
-    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-    try:
-      s.bind(('localhost', i))
+    if can_bind(i, socket.AF_INET) and can_bind(i, socket.AF_INET6) and not can_connect(i):
       req.log_message("found available port %d" % i)
       pool.append(i)
-    except:
-      pass # we really don't care about failures
-    finally:
-      s.close()
 
 
 def allocate_port(req):
@@ -128,6 +145,7 @@ class Handler(BaseHTTPRequestHandler):
 
   def do_GET(self):
     global keep_running
+    global mu
     if self.path == '/get':
       # allocate a new port, it will stay bound for ten minutes and until
       # it's unused
@@ -142,12 +160,15 @@ class Handler(BaseHTTPRequestHandler):
       self.send_header('Content-Type', 'text/plain')
       self.end_headers()
       p = int(self.path[6:])
+      mu.acquire()
       if p in in_use:
         del in_use[p]
         pool.append(p)
-        self.log_message('drop known port %d' % p)
+        k = 'known'
       else:
-        self.log_message('drop unknown port %d' % p)
+        k = 'unknown'
+      mu.release()
+      self.log_message('drop %s port %d' % (k, p))
     elif self.path == '/version_number':
       # fetch a version string and the current process pid
       self.send_response(200)
@@ -161,8 +182,11 @@ class Handler(BaseHTTPRequestHandler):
       self.send_response(200)
       self.send_header('Content-Type', 'text/plain')
       self.end_headers()
+      mu.acquire()
       now = time.time()
-      self.wfile.write(yaml.dump({'pool': pool, 'in_use': dict((k, now - v) for k, v in in_use.items())}))
+      out = yaml.dump({'pool': pool, 'in_use': dict((k, now - v) for k, v in in_use.items())})
+      mu.release()
+      self.wfile.write(out)
     elif self.path == '/quitquitquit':
       self.send_response(200)
       self.end_headers()
-- 
GitLab