Skip to content

Commit ba064c5

Browse files
author
Rohan Yadav
committed
scripts: add script that aggregates separate CSV's into single CSV
1 parent ccc4237 commit ba064c5

File tree

1 file changed

+56
-0
lines changed

1 file changed

+56
-0
lines changed

scripts/taco_bench_aggregator.py

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import csv
2+
import os
3+
import glob
4+
import sys
5+
import tqdm
6+
import argparse
7+
8+
def aggregateTacoBenches(folder, outfile, labelSet=None):
9+
first = True
10+
outputFile = open(outfile, 'w+')
11+
writer = csv.writer(outputFile, delimiter=',')
12+
# If labelSet is not None, we're going to collect all of the
13+
# valid labels we've seen, and output them to a file later.
14+
validLabels = set()
15+
# Loop through all files with a csv extension.
16+
with open(outfile, 'w+') as outputFile:
17+
writer = csv.writer(outputFile, delimiter=',')
18+
for fname in tqdm.tqdm(glob.glob(os.path.join(folder, "*.csv"))):
19+
# Open up the file.
20+
with open(fname, 'r') as f:
21+
# Discard the first 10 lines. This corresponds to the
22+
# google-benchmark generated header.
23+
for i in range(0, 10):
24+
f.readline()
25+
# Open the rest of the file as a CSV.
26+
reader = csv.reader(f)
27+
# Attempt to read the header from CSV. If this fails,
28+
# the benchmark might have failed in the middle. So,
29+
# just continue on to the next file.
30+
try:
31+
header = next(reader)
32+
except Exception as e:
33+
continue
34+
# Find the column that contains label. We're going to skip
35+
# entries that have a skip marker in the label.
36+
labelIdx = header.index("label")
37+
if first:
38+
writer.writerow(header)
39+
first = False
40+
for row in reader:
41+
if "SKIPPED" not in row[labelIdx]:
42+
validLabels.add(row[labelIdx])
43+
writer.writerow(row)
44+
# Write out the set of valid labels.
45+
if labelSet is not None:
46+
with open(labelSet, 'w+') as validSet:
47+
for l in validLabels:
48+
validSet.write(l)
49+
validSet.write("\n")
50+
51+
parser = argparse.ArgumentParser()
52+
parser.add_argument('target_directory', type=str, help="Directory containing CSV's to aggregate")
53+
parser.add_argument('output_csv_name', type=str, help="Name of the CSV to generate")
54+
parser.add_argument('--label_set_file', type=str, default=None, help='Set to output all valid labels seen to a file')
55+
args = parser.parse_args()
56+
aggregateTacoBenches(args.target_directory, args.output_csv_name, labelSet=args.label_set_file)

0 commit comments

Comments
 (0)