Add SuiteSparse mxv benchmark

stephenchouca · stephenchouca · commit 0423123f3fee · 2021-03-16T21:56:00.000-04:00
diff --git a/Makefile b/Makefile
@@ -15,13 +15,25 @@ NUMPY_JSON := $(NUMPY_JSON)
 # Taco Specific Flags
 TACO_OUT = results/taco/$(benches_name)benches_$(shell date +%Y_%m_%d_%H%M%S).csv
 
+# Set GRAPHBLAS=ON if compiling GraphBLAS benchmarks.
+ifeq ($(GRAPHBLAS),)
 GRAPHBLAS := "OFF"
+endif
+# Set OPENMP=ON if compiling TACO with OpenMP support.
+ifeq ($(OPENMP),)
 OPENMP := "OFF"
+endif
 # Set LANKA=ON if compiling on the MIT Lanka cluster.
 ifeq ($(LANKA),)
 LANKA := "OFF"
 endif
 
+ifeq ("$(LANKA)","ON")
+CMD := OMP_PROC_BIND=true LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) numactl -C 0,2,4,6,8,10,24,26,28,30,32,34 -m 0 taco/build/taco-bench $(BENCHFLAGS)
+else
+CMD := LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS)
+endif
+
 export TACO_TENSOR_PATH = data/
 
 # To group benchmark output by benchmark, use BENCHFLAGS=--benchmark-group-by=func.
@@ -42,18 +54,17 @@ convert-csv-all:
 
 taco-bench: taco/build/taco-bench
 ifeq ($(BENCHES),"")
-	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
-
+	$(CMD) --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
 else
-	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
+	$(CMD) --benchmark_filter="$(BENCHES)" --benchmark_out_format="csv" --benchmark_out="$(TACO_OUT)" --benchmark_repetitions=10 --benchmark_counters_tabular=true
 endif
 
 # Separate target to run the TACO benchmarks with numpy-taco cross validation logic.
 validate-taco-bench: taco/build/taco-bench validation-path
 ifeq ($(BENCHES),"")
-	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_repetitions=1
+	$(CMD) --benchmark_repetitions=1
 else
-	LD_LIBRARY_PATH=taco/build/lib/:$(LD_LIBRARY_PATH) taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
+	$(CMD) --benchmark_filter="$(BENCHES)" --benchmark_repetitions=1
 endif
 
 .PHONY: validation-path
diff --git a/taco/CMakeLists.txt b/taco/CMakeLists.txt
@@ -20,6 +20,9 @@ include_directories(taco taco/include benchmark/include suitesparse/Include)
 file(GLOB TEST_SOURCES *.cpp *.h)
 
 set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -std=c++14")
+if (GRAPHBLAS)
+  set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -fpermissive")
+endif(GRAPHBLAS)
 
 add_executable(taco-bench ${TEST_SOURCES} bench.h)
 target_link_libraries(taco-bench benchmark::benchmark)
diff --git a/taco/bench.h b/taco/bench.h
@@ -17,6 +17,14 @@
   ->ReportAggregatesOnly(true)    \
   ->UseRealTime()
 
+#define GRAPHBLAS_BENCH(bench,times)   \
+  BENCHMARK(bench)                \
+  ->Unit(benchmark::kMillisecond) \
+  ->Repetitions(times)               \
+  ->Iterations(1)                 \
+  ->ReportAggregatesOnly(false)    \
+  ->UseRealTime()
+
 // TACO_BENCH_ARG is similar to TACO_BENCH but allows for passing
 // of an arbitrarily typed argument to the benchmark function.
 // TODO (rohany): Make this take in only 1 argument.
diff --git a/taco/graphblas.cpp b/taco/graphblas.cpp
@@ -17,89 +17,207 @@ extern "C" {
 
 using namespace taco;
 
-struct AddImpl {
-  ir::Expr operator()(const std::vector<ir::Expr>& v) {
-    return ir::Add::make(v[0], v[1]);
-  }
-};
-Func AddOp("add", AddImpl(), {Annihilator(std::numeric_limits<double>::infinity()), Identity(0), Commutative(), Associative()});
+ir::Expr addImpl(const std::vector<ir::Expr>& v) {
+  return ir::Add::make(v[0], v[1]);
+}
+Func AddOp("add", addImpl, {Annihilator(std::numeric_limits<double>::infinity()), Identity(0), Commutative(), Associative()});
 
-struct MinImpl{
-  ir::Expr operator()(const std::vector<ir::Expr>& v) {
-    return ir::Min::make(v[0], v[1]);
-  }
-};
-Func MinOp("min", MinImpl(), {Identity(std::numeric_limits<double>::infinity()), Commutative(), Associative()});
+ir::Expr minImpl(const std::vector<ir::Expr>& v) {
+  return ir::Min::make(v[0], v[1]);
+}
+Func MinOp("min", minImpl, {Identity(std::numeric_limits<double>::infinity()), Commutative(), Associative()});
 
-struct MaskImpl {
-  ir::Expr operator()(const std::vector<ir::Expr>& v) {
-    return v[0];
-  }
-};
+ir::Expr maskImpl(const std::vector<ir::Expr>& v) {
+  return v[0];
+}
 struct MaskAlgebra {
   IterationAlgebra operator()(const std::vector<IndexExpr>& r) {
     return Intersect(r[0], Complement(r[1]));
   }
 };
-Func MaskOp("mask", MaskImpl(), MaskAlgebra());
+Func MaskOp("mask", maskImpl, MaskAlgebra());
+
+//static void bench_mxv_taco(benchmark::State& state) {
+//  Format dv({Dense});
+//
+//  Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
+//  Tensor<double> A(T.getDimensions(), CSR, std::numeric_limits<double>::infinity());
+//  for (const auto& c : T) {
+//    A.insert(c.first.toVector(), c.second);
+//  }
+//  A.pack();
+//
+//  // TODO: Only run for square matrices
+//
+//  Tensor<double> x({A.getDimension(1)}, dv, std::numeric_limits<double>::infinity());
+//  x.insert({0}, 0.0);
+//  x.pack();
+//
+//  IndexVar i, j;
+//
+//  taco_set_num_threads(12);
+//  for (auto _ : state) {
+//    state.PauseTiming();
+//
+//    Tensor<double> y({A.getDimension(0)}, dv, std::numeric_limits<double>::infinity());
+//    y(i) = Reduction(MinOp(), j, AddOp(A(i,j), x(j)));
+//    //y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
+//    //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
+//    //y(i) = MinOp(MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
+//    //y(i) = MaskOp(MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
+//    //y(i) = MinOp(FilterOp(x(i)) * Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
+//
+//    y.compile();
+//    y.assemble();
+//
+//    state.ResumeTiming();
+//
+//    y.compute();
+//  }
+//  taco_set_num_threads(1);
+//}
+//TACO_BENCH(bench_mxv_taco);
+
+//static void bench_mxv_suitesparse(benchmark::State& state) {
+//  GrB_init(GrB_BLOCKING);
+//  GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER);
+//  GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW);
+//
+//  int nthreads_max = 12;
+//  GxB_Global_Option_set(GxB_NTHREADS, nthreads_max);
+//
+//  Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
+//  GrB_Index M = T.getDimension(0);
+//  GrB_Index N = T.getDimension(1);
+//  GrB_Matrix A;
+//  GrB_Matrix_new(&A, GrB_FP64, M, N);
+//  std::vector<GrB_Index> I, J;
+//  std::vector<double> V;
+//  for (const auto& c : T) {
+//    I.push_back(c.first[0]);
+//    J.push_back(c.first[1]);
+//    V.push_back(c.second);
+//  }
+//  GrB_Matrix_build_FP64(A, I.data(), J.data(), V.data(), V.size(), GrB_PLUS_FP64);
+//  //GrB_Index nnz;
+//  //GrB_Matrix_nvals(&nnz, A);
+//  
+//  GrB_Vector x;
+//  GrB_Vector_new(&x, GrB_FP64, N);
+//  GrB_Vector_assign_FP64(x, NULL, NULL, 1, GrB_ALL, N, NULL);
+//  //GrB_Vector_setElement_FP64(
+//
+//  GrB_Vector y;
+//  GrB_Vector_new(&y, GrB_FP64, M);
+//  //GrB_Vector_assign_FP64(y, NULL, NULL, 0, GrB_ALL, M, NULL);
+//
+//  GrB_Descriptor desc;
+//  GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE);
+//
+//  for (auto _ : state) {
+//    GrB_mxv(y, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, A, x, desc);
+//    //GrB_vxm(x, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, x, A, desc);
+//  }
+//}
+
+Format dv({Dense});
+int nthreads = 4;
+
+struct GraphBLASFixture {
+  GraphBLASFixture() {
+    const auto path = "/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx";
+    Tensor<double> T = read(path, CSR);
+
+    // TODO: Only run for square matrices
+
+    A_trop_taco = Tensor<double>(T.getDimensions(), CSR, std::numeric_limits<double>::infinity());
+
+    GrB_init(GrB_BLOCKING);
+    GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER);
+    GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW);
+    GxB_Global_Option_set(GxB_NTHREADS, nthreads);
+  
+    GrB_Index M = T.getDimension(0);
+    GrB_Index N = T.getDimension(1);
+    GrB_Matrix_new(&A_trop_gb, GrB_FP64, M, N);
+
+    std::vector<GrB_Index> I, J;
+    std::vector<double> V;
+    for (const auto& c : T) {
+      I.push_back(c.first[0]);
+      J.push_back(c.first[1]);
+      V.push_back(c.second);
+      A_trop_taco.insert(c.first.toVector(), c.second);
+    }
+    GrB_Matrix_build_FP64(A_trop_gb, I.data(), J.data(), V.data(), V.size(), GrB_PLUS_FP64);
+    A_trop_taco.pack();
+  
+    GrB_Vector_new(&x_trop_gb, GrB_FP64, N);
+    GrB_Vector_assign_FP64(x_trop_gb, NULL, NULL, 1, GrB_ALL, N, NULL);
+  
+    x_trop_taco = Tensor<double>({T.getDimension(1)}, dv, std::numeric_limits<double>::infinity());
+    x_trop_taco.insert({0}, 0.0);
+    x_trop_taco.pack();
+  }
 
-static void bench_mxv_taco(benchmark::State& state) {
-  Format dv({Dense});
+  GrB_Matrix A_trop_gb;
+  GrB_Vector x_trop_gb;
+  Tensor<double> A_trop_taco;
+  Tensor<double> x_trop_taco;
+};
 
-  Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
-  Tensor<double> A(T.getDimensions(), CSR, std::numeric_limits<double>::infinity());
-  for (const auto& c : T) {
-    A.insert(c.first.toVector(), c.second);
-  }
-  A.pack();
+GraphBLASFixture fixture;
 
-  // TODO: Only run for square matrices
+static void bench_mxv_suitesparse(benchmark::State& state) {
+  GrB_init(GrB_BLOCKING);
+  GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER);
+  GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW);
+  GxB_Global_Option_set(GxB_NTHREADS, nthreads);
+
+  GrB_Descriptor desc;
+  GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE);
+  
+  GrB_Vector y = NULL;;
+  for (auto _ : state) {
+    state.PauseTiming();
 
-  Tensor<double> x({A.getDimension(1)}, dv, std::numeric_limits<double>::infinity());
-  x.insert({0}, 0.0);
-  x.pack();
+    GrB_Vector_free(&y);
 
-  IndexVar i, j;
+    state.ResumeTiming();
 
-  taco_set_num_threads(12);
+    GrB_Vector_new(&y, GrB_FP64, fixture.A_trop_taco.getDimension(0));
+    GrB_mxv(y, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, fixture.A_trop_gb, fixture.x_trop_gb, desc);
+    //GrB_vxm(x, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, x, A, desc);
+  }
+  GrB_Vector_free(&y);
+}
+
+static void bench_mxv_taco(benchmark::State& state) {
+  taco_set_num_threads(nthreads);
   for (auto _ : state) {
     state.PauseTiming();
 
-    Tensor<double> y({A.getDimension(0)}, dv, std::numeric_limits<double>::infinity());
-    y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
+    IndexVar i, j;
+    Tensor<double> y({fixture.A_trop_taco.getDimension(0)}, dv, std::numeric_limits<double>::infinity());
+    //y(i) = Reduction(MinOp(), j, AddOp(fixture.A_trop_taco(i,j), fixture.x_trop_taco(j)));
+    y(i) = MaskOp(Reduction(MinOp(), j, AddOp(fixture.A_trop_taco(i,j), fixture.x_trop_taco(j))), fixture.x_trop_taco(i));
+    //y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
     //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
     //y(i) = MinOp(MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
     //y(i) = MaskOp(MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i));
     //y(i) = MinOp(FilterOp(x(i)) * Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i));
 
     y.compile();
-    y.assemble();
 
     state.ResumeTiming();
 
+    y.assemble();
     y.compute();
   }
   taco_set_num_threads(1);
 }
 
-static void bench_mxv_suitesparse(benchmark::State& state) {
-  GrB_init(GrB_BLOCKING);
-
-  Tensor<double> T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR);
-
-  for (const auto& c : T) {
-    //A.insert(c.first.toVector(), c.second);
-  }
-
-  GrB_Vector x = nullptr;
-  GrB_Index n;
-  GrB_Vector_new(&x, GrB_FP64, n);
-
-  for (auto _ : state) {
-  }
-}
-
-TACO_BENCH(bench_mxv_taco);
-TACO_BENCH(bench_mxv_suitesparse);
+GRAPHBLAS_BENCH(bench_mxv_suitesparse, 1000);
+GRAPHBLAS_BENCH(bench_mxv_taco, 1000);
 
 #endif