diff --git a/tools/gcp/utils/big_query_utils.py b/tools/gcp/utils/big_query_utils.py index 9dbc69c5d669a70293125cf7aa223031de3464f4..8efca4b710a55d90a5a6a5d6144d26298c6e0a94 100755 --- a/tools/gcp/utils/big_query_utils.py +++ b/tools/gcp/utils/big_query_utils.py @@ -37,6 +37,8 @@ from apiclient import discovery from apiclient.errors import HttpError from oauth2client.client import GoogleCredentials +# 30 days in milliseconds +_EXPIRATION_MS = 30 * 24 * 60 * 60 * 1000 NUM_RETRIES = 3 @@ -79,8 +81,21 @@ def create_table(big_query, project_id, dataset_id, table_id, table_schema, fields, description) +def create_partitioned_table(big_query, project_id, dataset_id, table_id, table_schema, + description, partition_type='DAY', expiration_ms=_EXPIRATION_MS): + """Creates a partitioned table. By default, a date-paritioned table is created with + each partition lasting 30 days after it was last modified. + """ + fields = [{'name': field_name, + 'type': field_type, + 'description': field_description + } for (field_name, field_type, field_description) in table_schema] + return create_table2(big_query, project_id, dataset_id, table_id, + fields, description, partition_type, expiration_ms) + + def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, - description): + description, partition_type=None, expiration_ms=None): is_success = True body = { @@ -95,6 +110,12 @@ def create_table2(big_query, project_id, dataset_id, table_id, fields_schema, } } + if partition_type and expiration_ms: + body["timePartitioning"] = { + "type": partition_type, + "expirationMs": expiration_ms + } + try: table_req = big_query.tables().insert(projectId=project_id, datasetId=dataset_id, diff --git a/tools/run_tests/python_utils/upload_test_results.py b/tools/run_tests/python_utils/upload_test_results.py index d076d1e5a2a1683ac0d613f3271cd7b83a01a795..276fd0e083921346d8dc7549536d250b79c98b7f 100644 --- a/tools/run_tests/python_utils/upload_test_results.py +++ b/tools/run_tests/python_utils/upload_test_results.py @@ -45,6 +45,9 @@ import big_query_utils _DATASET_ID = 'jenkins_test_results' _DESCRIPTION = 'Test results from master job run on Jenkins' +# 90 days in milliseconds +_EXPIRATION_MS = 90 * 24 * 60 * 60 * 1000 +_PARTITION_TYPE = 'DAY' _PROJECT_ID = 'grpc-testing' _RESULTS_SCHEMA = [ ('job_name', 'STRING', 'Name of Jenkins job'), @@ -87,7 +90,8 @@ def upload_results_to_bq(resultset, bq_table, args, platform): platform: string name of platform tests were run on """ bq = big_query_utils.create_big_query() - big_query_utils.create_table(bq, _PROJECT_ID, _DATASET_ID, bq_table, _RESULTS_SCHEMA, _DESCRIPTION) + big_query_utils.create_partitioned_table(bq, _PROJECT_ID, _DATASET_ID, bq_table, _RESULTS_SCHEMA, _DESCRIPTION, + partition_type=_PARTITION_TYPE, expiration_ms= _EXPIRATION_MS) for shortname, results in six.iteritems(resultset): for result in results: