From bf3b769bd263047cc31ee9546ecc85396fca04a4 Mon Sep 17 00:00:00 2001
From: Adele Zhou <adelez@adelez2.mtv.corp.google.com>
Date: Thu, 11 Aug 2016 18:45:18 -0700
Subject: [PATCH] Tool for collecting build statistics.

---
 tools/run_tests/build_stats_schema.json       |  56 +++++
 .../build_stats_schema_no_matrix.json         |  44 ++++
 tools/run_tests/run_build_statistics.py       | 204 ++++++++++++++++++
 3 files changed, 304 insertions(+)
 create mode 100644 tools/run_tests/build_stats_schema.json
 create mode 100644 tools/run_tests/build_stats_schema_no_matrix.json
 create mode 100755 tools/run_tests/run_build_statistics.py

diff --git a/tools/run_tests/build_stats_schema.json b/tools/run_tests/build_stats_schema.json
new file mode 100644
index 0000000000..021a349545
--- /dev/null
+++ b/tools/run_tests/build_stats_schema.json
@@ -0,0 +1,56 @@
+[
+  {
+    "name": "build_number",
+    "type": "INTEGER",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "timestamp",
+    "type": "TIMESTAMP",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "matrix",
+    "type": "RECORD",
+    "mode": "REPEATED",
+    "fields": [
+      {
+        "name": "name",
+        "type": "STRING",
+        "mode": "NULLABLE"
+      },
+      {
+        "name": "duration",
+        "type": "FLOAT",
+        "mode": "NULLABLE"
+      },
+      {
+        "name": "pass_count",
+        "type": "INTEGER",
+        "mode": "NULLABLE"
+      },
+      {
+        "name": "failure_count",
+        "type": "INTEGER",
+        "mode": "NULLABLE"
+      },
+      {
+        "name": "error",
+        "type": "RECORD",
+        "mode": "REPEATED",
+        "fields": [
+          {
+            "name": "description",
+            "type": "STRING",
+            "mode": "NULLABLE"
+          },
+          {
+            "name": "count",
+            "type": "INTEGER",
+            "mode": "NULLABLE"
+          }
+        ]
+      }
+    ]
+  }
+]  
diff --git a/tools/run_tests/build_stats_schema_no_matrix.json b/tools/run_tests/build_stats_schema_no_matrix.json
new file mode 100644
index 0000000000..42650e3024
--- /dev/null
+++ b/tools/run_tests/build_stats_schema_no_matrix.json
@@ -0,0 +1,44 @@
+[
+  {
+    "name": "build_number",
+    "type": "INTEGER",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "timestamp",
+    "type": "TIMESTAMP",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "duration",
+    "type": "FLOAT",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "pass_count",
+    "type": "INTEGER",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "failure_count",
+    "type": "INTEGER",
+    "mode": "NULLABLE"
+  },
+  {
+    "name": "error",
+    "type": "RECORD",
+    "mode": "REPEATED",
+    "fields": [
+      {
+        "name": "description",
+        "type": "STRING",
+        "mode": "NULLABLE"
+      },
+      {
+        "name": "count",
+        "type": "INTEGER",
+        "mode": "NULLABLE"
+      }
+    ]
+  }
+]  
diff --git a/tools/run_tests/run_build_statistics.py b/tools/run_tests/run_build_statistics.py
new file mode 100755
index 0000000000..92c53782a8
--- /dev/null
+++ b/tools/run_tests/run_build_statistics.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python2.7
+# Copyright 2016, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Tool to get build statistics from Jenkins and upload to BigQuery."""
+
+import argparse
+import jenkinsapi
+from jenkinsapi.custom_exceptions import JenkinsAPIException
+from jenkinsapi.jenkins import Jenkins
+import json
+import os
+import re
+import sys
+import urllib
+
+
+gcp_utils_dir = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), '../gcp/utils'))
+sys.path.append(gcp_utils_dir)
+import big_query_utils
+
+
+_HAS_MATRIX=True
+_PROJECT_ID = 'grpc-testing'
+_HAS_MATRIX = True
+_BUILDS = {'gRPC_master': _HAS_MATRIX, 
+           'gRPC_interop_master': not _HAS_MATRIX, 
+           'gRPC_pull_requests': _HAS_MATRIX, 
+           'gRPC_interop_pull_requests': not _HAS_MATRIX,
+}
+_URL_BASE = 'https://grpc-testing.appspot.com/job'
+_KNOWN_ERRORS = [
+    'Failed to build workspace Tests with scheme AllTests',
+    'Build timed out',
+    'FATAL: Unable to produce a script file',
+    'FAILED: Failed to build interop docker images',
+    'LLVM ERROR: IO failure on output stream.',
+    'MSBUILD : error MSB1009: Project file does not exist.',
+]
+_UNKNOWN_ERROR = 'Unknown error'
+_DATASET_ID = 'build_statistics'
+
+
+def _scrape_for_known_errors(html):
+  error_list = []
+  known_error_count = 0
+  for known_error in _KNOWN_ERRORS:
+    errors = re.findall(known_error, html)
+    this_error_count = len(errors)
+    if this_error_count > 0: 
+      known_error_count += this_error_count
+      error_list.append({'description': known_error,
+                         'count': this_error_count})
+      print('====> %d failures due to %s' % (this_error_count, known_error))
+  return error_list, known_error_count
+
+
+def _get_last_processed_buildnumber(build_name):
+  query = 'SELECT max(build_number) FROM [%s:%s.%s];' % (
+      _PROJECT_ID, _DATASET_ID, build_name)
+  query_job = big_query_utils.sync_query_job(bq, _PROJECT_ID, query)
+  page = bq.jobs().getQueryResults(
+      pageToken=None,
+      **query_job['jobReference']).execute(num_retries=3)
+  if page['rows'][0]['f'][0]['v']:
+    return int(page['rows'][0]['f'][0]['v'])
+  return 0
+
+
+def _process_matrix(build, url_base):
+  matrix_list = []
+  for matrix in build.get_matrix_runs():
+    matrix_str = re.match('.*\\xc2\\xbb ((?:[^,]+,?)+) #.*', 
+                          matrix.name).groups()[0]
+    matrix_tuple = matrix_str.split(',')
+    json_url = '%s/config=%s,language=%s,platform=%s/testReport/api/json' % (
+        url_base, matrix_tuple[0], matrix_tuple[1], matrix_tuple[2])
+    console_url = '%s/config=%s,language=%s,platform=%s/consoleFull' % (
+        url_base, matrix_tuple[0], matrix_tuple[1], matrix_tuple[2])
+    matrix_dict = {'name': matrix_str,
+                   'duration': matrix.get_duration().total_seconds()}
+    matrix_dict.update(_process_build(json_url, console_url))
+    matrix_list.append(matrix_dict)
+
+  return matrix_list 
+
+
+def _process_build(json_url, console_url):
+  build_result = {}
+  error_list = []
+  try:
+    html = urllib.urlopen(json_url).read()
+    test_result = json.loads(html)
+    print('====> Parsing result from %s' % json_url)
+    failure_count = test_result['failCount']
+    build_result['pass_count'] = test_result['passCount']
+    build_result['failure_count'] = failure_count
+    if failure_count > 0:
+      error_list, known_error_count = _scrape_for_known_errors(html)
+      unknown_error_count = failure_count - known_error_count
+      # This can happen if the same error occurs multiple times in one test.
+      if failure_count < known_error_count:
+        print('====> Some errors are duplicates.')
+        unknown_error_count = 0
+      error_list.append({'description': _UNKNOWN_ERROR, 
+                         'count': unknown_error_count})
+  except Exception as e:
+    print('====> Got exception for %s: %s.' % (json_url, str(e)))   
+    print('====> Parsing errors from %s.' % console_url)
+    html = urllib.urlopen(console_url).read()
+    build_result['pass_count'] = 0  
+    build_result['failure_count'] = 1
+    error_list, _ = _scrape_for_known_errors(html)
+    if error_list:
+      error_list.append({'description': _UNKNOWN_ERROR, 'count': 0})
+    else:
+      error_list.append({'description': _UNKNOWN_ERROR, 'count': 1})
+ 
+  if error_list:
+    build_result['error'] = error_list
+
+  return build_result 
+
+
+# parse command line
+argp = argparse.ArgumentParser(description='Get build statistics.')
+argp.add_argument('-u', '--username', default='jenkins')
+argp.add_argument('-b', '--builds', 
+                  choices=['all'] + sorted(_BUILDS.keys()),
+                  nargs='+',
+                  default=['all'])
+args = argp.parse_args()
+
+J = Jenkins('https://grpc-testing.appspot.com', args.username, 'apiToken')
+bq = big_query_utils.create_big_query()
+
+for build_name in _BUILDS.keys() if 'all' in args.builds else args.builds:
+  print('====> Build: %s' % build_name)
+  # Since get_last_completed_build() always fails due to malformatted string
+  # error, we use get_build_metadata() instead.
+  job = None
+  try:
+    job = J[build_name]
+  except Exception as e:
+    print('====> Failed to get build %s: %s.' % (build_name, str(e)))
+    continue
+  last_processed_build_number = _get_last_processed_buildnumber(build_name)
+  last_complete_build_number = job.get_last_completed_buildnumber()
+  # To avoid processing all builds for a project never looked at. In this case,
+  # only examine 10 latest builds.
+  starting_build_number = max(last_processed_build_number+1, 
+                              last_complete_build_number-9)
+  for build_number in xrange(starting_build_number, 
+                             last_complete_build_number+1):
+    print('====> Processing %s build %d.' % (build_name, build_number))
+    build = None
+    try:
+      build = job.get_build_metadata(build_number)
+    except KeyError:
+      print('====> Build %s is missing. Skip.' % build_number)
+      continue
+    build_result = {'build_number': build_number, 
+                    'timestamp': str(build.get_timestamp())}
+    url_base = json_url = '%s/%s/%d' % (_URL_BASE, build_name, build_number)
+    if _BUILDS[build_name]:  # The build has matrix, such as gRPC_master.
+      build_result['matrix'] = _process_matrix(build, url_base)
+    else:
+      json_url = '%s/testReport/api/json' % url_base
+      console_url = '%s/consoleFull' % url_base
+      build_result['duration'] = build.get_duration().total_seconds()
+      build_result.update(_process_build(json_url, console_url))
+    rows = [big_query_utils.make_row(build_number, build_result)]
+    if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET_ID, build_name, 
+                                       rows):
+      print '====> Error uploading result to bigquery.'
+      sys.exit(1)
+
-- 
GitLab