diff --git a/tools/line_count/collect-history.py b/tools/line_count/collect-history.py
index 858a6e1d30c7c46730d57d6030ef0af26782923a..4c1bf73b1e15843e12176d31eb5a6f0925332d6d 100755
--- a/tools/line_count/collect-history.py
+++ b/tools/line_count/collect-history.py
@@ -31,6 +31,9 @@
 import subprocess
 import datetime
 
+# this script is only of historical interest: it's the script that was used to
+# bootstrap the dataset
+
 def daterange(start, end):
   for n in range(int((end - start).days)):
     yield start + datetime.timedelta(n)
diff --git a/tools/line_count/collect-now.sh b/tools/line_count/collect-now.sh
index 2db26298dc43b213cc6b618ff5407527697dd26a..44f4b4ed3107160ed4abf57b68f593dabd0494c9 100644
--- a/tools/line_count/collect-now.sh
+++ b/tools/line_count/collect-now.sh
@@ -31,6 +31,6 @@
 set -ex
 
 cloc --vcs=git --by-file --yaml --out=cloc.yaml .
-tools/line_count/yaml2csv -i cloc.yaml -d `date +%Y-%m-%d` -o cloc.csv
+tools/line_count/yaml2csv.py -i cloc.yaml -d `date +%Y-%m-%d` -o cloc.csv
 bq load line_counts.grpc cloc.csv
 
diff --git a/tools/line_count/summarize-history.py b/tools/line_count/summarize-history.py
index 756dc79a4dc06ac9c2772ce4c4cf48281137a82d..cb6d570f66543f8d3e960493ec8decaf2f0c327d 100755
--- a/tools/line_count/summarize-history.py
+++ b/tools/line_count/summarize-history.py
@@ -32,6 +32,9 @@
 import subprocess
 import datetime
 
+# this script is only of historical interest: it's the script that was used to
+# bootstrap the dataset
+
 def daterange(start, end):
   for n in range(int((end - start).days)):
     yield start + datetime.timedelta(n)