From bbfb25bf708688748fd9bf37c03ea7cfc5bcc410 Mon Sep 17 00:00:00 2001
From: Craig Tiller <ctiller@google.com>
Date: Fri, 10 Mar 2017 17:24:45 -0800
Subject: [PATCH] Sanitize before bigquery upload

---
 tools/profiling/microbenchmarks/bm2bq.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tools/profiling/microbenchmarks/bm2bq.py b/tools/profiling/microbenchmarks/bm2bq.py
index ae59332f1b..ffb11f57d8 100755
--- a/tools/profiling/microbenchmarks/bm2bq.py
+++ b/tools/profiling/microbenchmarks/bm2bq.py
@@ -73,6 +73,14 @@ columns = [
   ('framing_bytes_per_iteration', 'float'),
 ]
 
+SANITIZE = {
+  'integer': int,
+  'float': float,
+  'boolean': bool,
+  'string': str,
+  'timestamp': str,
+}
+
 if sys.argv[1] == '--schema':
   print ',\n'.join('%s:%s' % (k, t.upper()) for k, t in columns)
   sys.exit(0)
@@ -89,7 +97,10 @@ else:
 writer = csv.DictWriter(sys.stdout, [c for c,t in columns])
 
 for row in bm_json.expand_json(js, js2):
-  if 'label' in row:
-    del row['label']
-  del row['cpp_name']
-  writer.writerow(row)
+  sane_row = {}
+  for name, sql_type in columns:
+    if name in row:
+      if row[name] == '': continue
+      sane_row[name] = SANITIZE[sql_type](row[name])
+  writer.writerow(sane_row)
+
-- 
GitLab