diff --git a/tools/jenkins/run_line_count.sh b/tools/jenkins/run_line_count.sh new file mode 100644 index 0000000000000000000000000000000000000000..a2bde534130b88ad5687c44ec7165310649735b0 --- /dev/null +++ b/tools/jenkins/run_line_count.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# This script is invoked by Jenkins and counts the number of lines in the +# project. +set -ex + +cd $(dirname $0)/../.. +tools/line_count/collect-now.sh diff --git a/tools/line_count/collect-history.py b/tools/line_count/collect-history.py new file mode 100755 index 0000000000000000000000000000000000000000..4c1bf73b1e15843e12176d31eb5a6f0925332d6d --- /dev/null +++ b/tools/line_count/collect-history.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import subprocess +import datetime + +# this script is only of historical interest: it's the script that was used to +# bootstrap the dataset + +def daterange(start, end): + for n in range(int((end - start).days)): + yield start + datetime.timedelta(n) + +start_date = datetime.date(2017, 3, 26) +end_date = datetime.date(2017, 3, 29) + +for dt in daterange(start_date, end_date): + dmy = dt.strftime('%Y-%m-%d') + sha1 = subprocess.check_output(['git', 'rev-list', '-n', '1', + '--before=%s' % dmy, + 'master']).strip() + subprocess.check_call(['git', 'checkout', sha1]) + subprocess.check_call(['git', 'submodule', 'update']) + subprocess.check_call(['git', 'clean', '-f', '-x', '-d']) + subprocess.check_call(['cloc', '--vcs=git', '--by-file', '--yaml', '--out=../count/%s.yaml' % dmy, '.']) + diff --git a/tools/line_count/collect-now.sh b/tools/line_count/collect-now.sh new file mode 100644 index 0000000000000000000000000000000000000000..44f4b4ed3107160ed4abf57b68f593dabd0494c9 --- /dev/null +++ b/tools/line_count/collect-now.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -ex + +cloc --vcs=git --by-file --yaml --out=cloc.yaml . +tools/line_count/yaml2csv.py -i cloc.yaml -d `date +%Y-%m-%d` -o cloc.csv +bq load line_counts.grpc cloc.csv + diff --git a/tools/line_count/summarize-history.py b/tools/line_count/summarize-history.py new file mode 100755 index 0000000000000000000000000000000000000000..cb6d570f66543f8d3e960493ec8decaf2f0c327d --- /dev/null +++ b/tools/line_count/summarize-history.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import subprocess +import datetime + +# this script is only of historical interest: it's the script that was used to +# bootstrap the dataset + +def daterange(start, end): + for n in range(int((end - start).days)): + yield start + datetime.timedelta(n) + +start_date = datetime.date(2017, 3, 26) +end_date = datetime.date(2017, 3, 29) + +for dt in daterange(start_date, end_date): + dmy = dt.strftime('%Y-%m-%d') + print dmy + subprocess.check_call(['tools/line_count/yaml2csv.py', '-i', '../count/%s.yaml' % dmy, '-d', dmy, '-o', '../count/%s.csv' % dmy]) + diff --git a/tools/line_count/yaml2csv.py b/tools/line_count/yaml2csv.py new file mode 100755 index 0000000000000000000000000000000000000000..9bda09fd087c5087a0810416a851ff90b9f574c9 --- /dev/null +++ b/tools/line_count/yaml2csv.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# Copyright 2017, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import yaml +import argparse +import datetime +import csv + +argp = argparse.ArgumentParser(description='Convert cloc yaml to bigquery csv') +argp.add_argument('-i', '--input', type=str) +argp.add_argument('-d', '--date', type=str, default=datetime.date.today().strftime('%Y-%m-%d')) +argp.add_argument('-o', '--output', type=str, default='out.csv') +args = argp.parse_args() + +data = yaml.load(open(args.input).read()) +with open(args.output, 'w') as outf: + writer = csv.DictWriter(outf, ['date', 'name', 'language', 'code', 'comment', 'blank']) + for key, value in data.iteritems(): + if key == 'header': continue + if key == 'SUM': continue + if key.startswith('third_party/'): continue + row = {'name': key, 'date': args.date} + row.update(value) + writer.writerow(row) +