Skip to content

Commit 0423123

Browse files
committed
Add SuiteSparse mxv benchmark
1 parent d696599 commit 0423123

File tree

4 files changed

+199
-59
lines changed

4 files changed

+199
-59
lines changed

Makefile

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,25 @@ NUMPY_JSON := $(NUMPY_JSON)
1515
# Taco Specific Flags
1616
TACO_OUT = results/taco/$(benches_name)benches_$(shell date +%Y_%m_%d_%H%M%S).csv
1717

18+
# Set GRAPHBLAS=ON if compiling GraphBLAS benchmarks.
19+
ifeq ($(GRAPHBLAS),)
1820
GRAPHBLAS := "OFF"
21+
endif
22+
# Set OPENMP=ON if compiling TACO with OpenMP support.
23+
ifeq ($(OPENMP),)
1924
OPENMP := "OFF"
25+
endif
2026
# Set LANKA=ON if compiling on the MIT Lanka cluster.
2127
ifeq ($(LANKA),)
2228
LANKA := "OFF"
2329
endif
2430

31+
ifeq ("$(LANKA)","ON")
32+
CMD := OMP_PROC_BIND=true LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) numactl -C 0,2,4,6,8,10,24,26,28,30,32,34 -m 0 taco/build/taco-bench $(BENCHFLAGS)
33+
else
34+
CMD := LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS)
35+
endif
36+
2537
export TACO_TENSOR_PATH = data/
2638

2739
# To group benchmark output by benchmark, use BENCHFLAGS=--benchmark-group-by=func.
@@ -42,18 +54,17 @@ convert-csv-all:
4254

4355
taco-bench: taco/build/taco-bench
4456
ifeq ($(BENCHES),"")
45-
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
46-
57+
$(CMD) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
4758
else
48-
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
59+
$(CMD) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
4960
endif
5061

5162
# Separate target to run the TACO benchmarks with numpy-taco cross validation logic.
5263
validate-taco-bench: taco/build/taco-bench validation-path
5364
ifeq ($(BENCHES),"")
54-
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_repetitions=1
65+
$(CMD) --benchmark_repetitions=1
5566
else
56-
LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
67+
$(CMD) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
5768
endif
5869

5970
.PHONY: validation-path

taco/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ include_directories(taco taco/include benchmark/include suitesparse/Include)
2020
file(GLOB TEST_SOURCES *.cpp *.h)
2121

2222
set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -std=c++14")
23+
if (GRAPHBLAS)
24+
set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -fpermissive")
25+
endif(GRAPHBLAS)
2326

2427
add_executable(taco-bench ${TEST_SOURCES} bench.h)
2528
target_link_libraries(taco-bench benchmark::benchmark)

taco/bench.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
->ReportAggregatesOnly(true) \
1818
->UseRealTime()
1919

20+
#define GRAPHBLAS_BENCH(bench,times) \
21+
BENCHMARK(bench) \
22+
->Unit(benchmark::kMillisecond) \
23+
->Repetitions(times) \
24+
->Iterations(1) \
25+
->ReportAggregatesOnly(false) \
26+
->UseRealTime()
27+
2028
// TACO_BENCH_ARG is similar to TACO_BENCH but allows for passing
2129
// of an arbitrarily typed argument to the benchmark function.
2230
// TODO (rohany): Make this take in only 1 argument.

taco/graphblas.cpp

Lines changed: 172 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -17,89 +17,207 @@ extern "C" {
1717

1818
using namespace taco;
1919

20-
struct AddImpl {
21-
ir::Expr operator()(const std::vector<ir::Expr>& v) {
22-
return ir::Add::make(v[0], v[1]);
23-
}
24-
};
25-
Func AddOp("add", AddImpl(), {Annihilator(std::numeric_limits<double>::infinity()), Identity(0), Commutative(), Associative()});
20+
ir::Expr addImpl(const std::vector<ir::Expr>& v) {
21+
return ir::Add::make(v[0], v[1]);
22+
}
23+
Func AddOp("add", addImpl, {Annihilator(std::numeric_limits<double>::infinity()), Identity(0), Commutative(), Associative()});
2624

27-
struct MinImpl{
28-
ir::Expr operator()(const std::vector<ir::Expr>& v) {
29-
return ir::Min::make(v[0], v[1]);
30-
}
31-
};
32-
Func MinOp("min", MinImpl(), {Identity(std::numeric_limits<double>::infinity()), Commutative(), Associative()});
25+
ir::Expr minImpl(const std::vector<ir::Expr>& v) {
26+
return ir::Min::make(v[0], v[1]);
27+
}
28+
Func MinOp("min", minImpl, {Identity(std::numeric_limits<double>::infinity()), Commutative(), Associative()});
3329

34-
struct MaskImpl {
35-
ir::Expr operator()(const std::vector<ir::Expr>& v) {
36-
return v[0];
37-
}
38-
};
30+
ir::Expr maskImpl(const std::vector<ir::Expr>& v) {
31+
return v[0];
32+
}
3933
struct MaskAlgebra {
4034
IterationAlgebra operator()(const std::vector<IndexExpr>& r) {
4135
return Intersect(r[0], Complement(r[1]));
4236
}
4337
};
44-
Func MaskOp("mask", MaskImpl(), MaskAlgebra());
38+
Func MaskOp("mask", maskImpl, MaskAlgebra());
39+
40+
//static void bench_mxv_taco(benchmark::State& state) {
41+
// Format dv({Dense});
42+
//
43+
// Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
44+
// Tensor<double> A(T.getDimensions(), CSR, std::numeric_limits<double>::infinity());
45+
// for (const auto& c : T) {
46+
// A.insert(c.first.toVector(), c.second);
47+
// }
48+
// A.pack();
49+
//
50+
// // TODO: Only run for square matrices
51+
//
52+
// Tensor<double> x({A.getDimension(1)}, dv, std::numeric_limits<double>::infinity());
53+
// x.insert({0}, 0.0);
54+
// x.pack();
55+
//
56+
// IndexVar i, j;
57+
//
58+
// taco_set_num_threads(12);
59+
// for (auto _ : state) {
60+
// state.PauseTiming();
61+
//
62+
// Tensor<double> y({A.getDimension(0)}, dv, std::numeric_limits<double>::infinity());
63+
// y(i) = Reduction(MinOp(), j, AddOp(A(i,j), x(j)));
64+
// //y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
65+
// //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
66+
// //y(i) = MinOp(MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
67+
// //y(i) = MaskOp(MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
68+
// //y(i) = MinOp(FilterOp(x(i)) * Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
69+
//
70+
// y.compile();
71+
// y.assemble();
72+
//
73+
// state.ResumeTiming();
74+
//
75+
// y.compute();
76+
// }
77+
// taco_set_num_threads(1);
78+
//}
79+
//TACO_BENCH(bench_mxv_taco);
80+
81+
//static void bench_mxv_suitesparse(benchmark::State& state) {
82+
// GrB_init(GrB_BLOCKING);
83+
// GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER);
84+
// GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW);
85+
//
86+
// int nthreads_max = 12;
87+
// GxB_Global_Option_set(GxB_NTHREADS, nthreads_max);
88+
//
89+
// Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
90+
// GrB_Index M = T.getDimension(0);
91+
// GrB_Index N = T.getDimension(1);
92+
// GrB_Matrix A;
93+
// GrB_Matrix_new(&A, GrB_FP64, M, N);
94+
// std::vector<GrB_Index> I, J;
95+
// std::vector<double> V;
96+
// for (const auto& c : T) {
97+
// I.push_back(c.first[0]);
98+
// J.push_back(c.first[1]);
99+
// V.push_back(c.second);
100+
// }
101+
// GrB_Matrix_build_FP64(A, I.data(), J.data(), V.data(), V.size(), GrB_PLUS_FP64);
102+
// //GrB_Index nnz;
103+
// //GrB_Matrix_nvals(&nnz, A);
104+
//
105+
// GrB_Vector x;
106+
// GrB_Vector_new(&x, GrB_FP64, N);
107+
// GrB_Vector_assign_FP64(x, NULL, NULL, 1, GrB_ALL, N, NULL);
108+
// //GrB_Vector_setElement_FP64(
109+
//
110+
// GrB_Vector y;
111+
// GrB_Vector_new(&y, GrB_FP64, M);
112+
// //GrB_Vector_assign_FP64(y, NULL, NULL, 0, GrB_ALL, M, NULL);
113+
//
114+
// GrB_Descriptor desc;
115+
// GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE);
116+
//
117+
// for (auto _ : state) {
118+
// GrB_mxv(y, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, A, x, desc);
119+
// //GrB_vxm(x, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, x, A, desc);
120+
// }
121+
//}
122+
123+
Format dv({Dense});
124+
int nthreads = 4;
125+
126+
struct GraphBLASFixture {
127+
GraphBLASFixture() {
128+
const auto path = "/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx";
129+
Tensor<double> T = read(path, CSR);
130+
131+
// TODO: Only run for square matrices
132+
133+
A_trop_taco = Tensor<double>(T.getDimensions(), CSR, std::numeric_limits<double>::infinity());
134+
135+
GrB_init(GrB_BLOCKING);
136+
GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER);
137+
GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW);
138+
GxB_Global_Option_set(GxB_NTHREADS, nthreads);
139+
140+
GrB_Index M = T.getDimension(0);
141+
GrB_Index N = T.getDimension(1);
142+
GrB_Matrix_new(&A_trop_gb, GrB_FP64, M, N);
143+
144+
std::vector<GrB_Index> I, J;
145+
std::vector<double> V;
146+
for (const auto& c : T) {
147+
I.push_back(c.first[0]);
148+
J.push_back(c.first[1]);
149+
V.push_back(c.second);
150+
A_trop_taco.insert(c.first.toVector(), c.second);
151+
}
152+
GrB_Matrix_build_FP64(A_trop_gb, I.data(), J.data(), V.data(), V.size(), GrB_PLUS_FP64);
153+
A_trop_taco.pack();
154+
155+
GrB_Vector_new(&x_trop_gb, GrB_FP64, N);
156+
GrB_Vector_assign_FP64(x_trop_gb, NULL, NULL, 1, GrB_ALL, N, NULL);
157+
158+
x_trop_taco = Tensor<double>({T.getDimension(1)}, dv, std::numeric_limits<double>::infinity());
159+
x_trop_taco.insert({0}, 0.0);
160+
x_trop_taco.pack();
161+
}
45162

46-
static void bench_mxv_taco(benchmark::State& state) {
47-
Format dv({Dense});
163+
GrB_Matrix A_trop_gb;
164+
GrB_Vector x_trop_gb;
165+
Tensor<double> A_trop_taco;
166+
Tensor<double> x_trop_taco;
167+
};
48168

49-
Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
50-
Tensor<double> A(T.getDimensions(), CSR, std::numeric_limits<double>::infinity());
51-
for (const auto& c : T) {
52-
A.insert(c.first.toVector(), c.second);
53-
}
54-
A.pack();
169+
GraphBLASFixture fixture;
55170

56-
// TODO: Only run for square matrices
171+
static void bench_mxv_suitesparse(benchmark::State& state) {
172+
GrB_init(GrB_BLOCKING);
173+
GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER);
174+
GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW);
175+
GxB_Global_Option_set(GxB_NTHREADS, nthreads);
176+
177+
GrB_Descriptor desc;
178+
GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE);
179+
180+
GrB_Vector y = NULL;;
181+
for (auto _ : state) {
182+
state.PauseTiming();
57183

58-
Tensor<double> x({A.getDimension(1)}, dv, std::numeric_limits<double>::infinity());
59-
x.insert({0}, 0.0);
60-
x.pack();
184+
GrB_Vector_free(&y);
61185

62-
IndexVar i, j;
186+
state.ResumeTiming();
63187

64-
taco_set_num_threads(12);
188+
GrB_Vector_new(&y, GrB_FP64, fixture.A_trop_taco.getDimension(0));
189+
GrB_mxv(y, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, fixture.A_trop_gb, fixture.x_trop_gb, desc);
190+
//GrB_vxm(x, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, x, A, desc);
191+
}
192+
GrB_Vector_free(&y);
193+
}
194+
195+
static void bench_mxv_taco(benchmark::State& state) {
196+
taco_set_num_threads(nthreads);
65197
for (auto _ : state) {
66198
state.PauseTiming();
67199

68-
Tensor<double> y({A.getDimension(0)}, dv, std::numeric_limits<double>::infinity());
69-
y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
200+
IndexVar i, j;
201+
Tensor<double> y({fixture.A_trop_taco.getDimension(0)}, dv, std::numeric_limits<double>::infinity());
202+
//y(i) = Reduction(MinOp(), j, AddOp(fixture.A_trop_taco(i,j), fixture.x_trop_taco(j)));
203+
y(i) = MaskOp(Reduction(MinOp(), j, AddOp(fixture.A_trop_taco(i,j), fixture.x_trop_taco(j))), fixture.x_trop_taco(i));
204+
//y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
70205
//y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
71206
//y(i) = MinOp(MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
72207
//y(i) = MaskOp(MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
73208
//y(i) = MinOp(FilterOp(x(i)) * Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
74209

75210
y.compile();
76-
y.assemble();
77211

78212
state.ResumeTiming();
79213

214+
y.assemble();
80215
y.compute();
81216
}
82217
taco_set_num_threads(1);
83218
}
84219

85-
static void bench_mxv_suitesparse(benchmark::State& state) {
86-
GrB_init(GrB_BLOCKING);
87-
88-
Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
89-
90-
for (const auto& c : T) {
91-
//A.insert(c.first.toVector(), c.second);
92-
}
93-
94-
GrB_Vector x = nullptr;
95-
GrB_Index n;
96-
GrB_Vector_new(&x, GrB_FP64, n);
97-
98-
for (auto _ : state) {
99-
}
100-
}
101-
102-
TACO_BENCH(bench_mxv_taco);
103-
TACO_BENCH(bench_mxv_suitesparse);
220+
GRAPHBLAS_BENCH(bench_mxv_suitesparse, 1000);
221+
GRAPHBLAS_BENCH(bench_mxv_taco, 1000);
104222

105223
#endif

0 commit comments

Comments
 (0)