*: several updates

rohany · rohany · commit 2d8046f93a9a · 2021-02-25T16:13:37.000-08:00
* Point `taco` submodule at the `array_algebra` branch on TACO
* Clean up more targetse in the makefile
* Add a file with some common benchmark utilities
* Add a simple benchmark to find the cutoff points between TACO vs numpy
diff --git a/Makefile b/Makefile
@@ -14,12 +14,8 @@ else
 	taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)"
 endif
 
-taco/build/taco-bench: check-and-reinit-submodules taco/build/Makefile
-	$(MAKE) -C taco/build taco-bench
-
-taco/build/Makefile: taco/benchmark/googletest
-	mkdir -p taco/build
-	cd taco/build/ && cmake ../
+taco/build/taco-bench: check-and-reinit-submodules taco/benchmark/googletest
+	cd taco/build/ && cmake ../ && make taco-bench 
 
 taco/benchmark/googletest: check-and-reinit-submodules
 	if [ ! -d "taco/benchmark/googletest" ] ; then git clone https://github.com/google/googletest taco/benchmark/googletest; fi
diff --git a/numpy/conftest.py b/numpy/conftest.py
@@ -3,5 +3,7 @@
 def tacoBench(benchmark):
     def f(func):
         # Take statistics based on 10 rounds.
-        benchmark.pedantic(func, rounds=10, iterations=1)
+        benchmark.pedantic(func, rounds=10, iterations=5)
+        # How do i set please use 10 rounds...
+        # benchmark(func)
     return f
diff --git a/numpy/threshold.py b/numpy/threshold.py
@@ -0,0 +1,11 @@
+import numpy
+from scipy.sparse import random
+import pytest
+
+@pytest.mark.parametrize("dim", [250, 500, 750, 1000, 2500, 5000, 7500, 8000])
+def bench_add_dense_threshold(tacoBench, dim):
+    matrix1 = random(dim, dim, format="csr").todense()
+    matrix2 = random(dim, dim, format="csr").todense()
+    def bench():
+        res = matrix1 + matrix2
+    tacoBench(bench)
diff --git a/taco/CMakeLists.txt b/taco/CMakeLists.txt
@@ -7,11 +7,11 @@ add_subdirectory(benchmark)
 
 include_directories(taco taco/include benchmark/include)
 
-file(GLOB TEST_SOURCES *.cpp)
+file(GLOB TEST_SOURCES *.cpp *.h)
 
 set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -std=c++14")
 
-add_executable(taco-bench ${TEST_SOURCES})
+add_executable(taco-bench ${TEST_SOURCES} bench.h)
 target_link_libraries(taco-bench benchmark::benchmark)
 target_link_libraries(taco-bench taco)
 
diff --git a/taco/bench.h b/taco/bench.h
@@ -0,0 +1,31 @@
+#ifndef TACO_BENCH_BENCH_H
+#define TACO_BENCH_BENCH_H
+
+#include "benchmark/benchmark.h"
+
+// Register a benchmark with the following options:
+// * Millisecond output display
+// * 10 data points
+// * Reporting of avg/stddev/median
+// * Wall-clock time, rather than CPU time.
+#define TACO_BENCH(bench)         \
+  BENCHMARK(bench)                \
+  ->Unit(benchmark::kMillisecond) \
+  ->Repetitions(10)               \
+  ->Iterations(5)                 \
+  ->ReportAggregatesOnly(true)    \
+  ->UseRealTime()
+
+// TACO_BENCH_ARG is similar to TACO_BENCH but allows for passing
+// of an arbitrarily typed argument to the benchmark function.
+// TODO (rohany): Make this take in only 1 argument.
+// TODO (rohany): Don't specify the time here, but do it at the command line.
+#define TACO_BENCH_ARG(bench, name, arg) \
+  BENCHMARK_CAPTURE(bench, name, arg)    \
+  ->Unit(benchmark::kMicrosecond)        \
+  ->Repetitions(10)                      \
+  ->Iterations(5)                        \
+  ->ReportAggregatesOnly(true)           \
+  ->UseRealTime()
+
+#endif //TACO_BENCH_BENCH_H
diff --git a/taco/taco b/taco/taco
@@ -1 +1 @@
-Subproject commit baf174ccdf322b1a6daad6b83979490aa3811a78
+Subproject commit 6a853ac4b21fbb22d4acf4e5c0f61b51977bd94e
diff --git a/taco/threshold.cpp b/taco/threshold.cpp
@@ -0,0 +1,50 @@
+#include "bench.h"
+#include "benchmark/benchmark.h"
+
+#include "taco/tensor.h"
+#include "taco/format.h"
+#include "taco/index_notation/index_notation.h"
+
+using namespace taco;
+
+// bench_add_sparse_threshold adds two tensors of a given dimension and sparsity.
+// It does this range on the TACO side to see when TACO becomes faster than NumPY.
+static void bench_add_sparse_threshold(benchmark::State& state, float sparsity) {
+  int dim = state.range(0);
+  Tensor<float> matrix1("A", {dim, dim}, CSR);
+  Tensor<float> matrix2("B", {dim, dim}, CSR);
+  Tensor<float> result("C", {dim, dim}, CSR);
+
+  srand(4357);
+  // TODO (rohany): Move this into a helper method.
+  for (int i = 0; i < dim; i++) {
+    for (int j = 0; j < dim; j++) {
+      float rand_float = (float)rand()/(float)(RAND_MAX);
+      if (rand_float < sparsity) {
+        matrix1.insert({i, j}, (float) ((int) (rand_float*3/sparsity)));
+      }
+
+      rand_float = (float)rand()/(float)(RAND_MAX);
+      if (rand_float < sparsity) {
+        matrix2.insert({i, j}, (float) ((int) (rand_float*3/sparsity)));
+      }
+    }
+  }
+  matrix1.pack(); matrix2.pack();
+
+  IndexVar i, j;
+  result(i, j) = matrix1(i, j) + matrix2(i, j);
+  result.compile();
+  result.assemble();
+
+  for (auto _ : state) {
+    result.compute();
+  }
+}
+static void applyBenchSizes(benchmark::internal::Benchmark* b) {
+  b->ArgsProduct({{250, 500, 750, 1000, 2500, 5000, 7500, 8000}});
+}
+// TACO_BENCH_ARG(bench_add_sparse_threshold, 0.001, 0.001)->Apply(applyBenchSizes);
+// TACO_BENCH_ARG(bench_add_sparse_threshold, 0.01, 0.01)->Apply(applyBenchSizes);
+// TACO_BENCH_ARG(bench_add_sparse_threshold, 0.05, 0.05)->Apply(applyBenchSizes);
+TACO_BENCH_ARG(bench_add_sparse_threshold, 0.25, 0.25)->Apply(applyBenchSizes);
diff --git a/taco/windowing.cpp b/taco/windowing.cpp
@@ -1,3 +1,4 @@
+#include "bench.h"
 #include "benchmark/benchmark.h"
 
 #include "taco/tensor.h"
@@ -6,12 +7,18 @@
 
 using namespace taco;
 
-static void BM_basic_windowing(benchmark::State& state) {
-  auto dim = 10000;
+// applyBenchSizes is used to configure the benchmarks to run with the
+// input arguments.
+static void applyBenchSizes(benchmark::internal::Benchmark* b) {
+  // Currently considering these size square tensors.
+  b->ArgsProduct({{5000, 10000, 20000}});
+}
+
+static void bench_add_sparse_window(benchmark::State& state, const Format& f) {
+  int dim = state.range(0);
   auto sparsity = 0.01;
-  // CSR format.
-  Tensor<float> matrix("A", {dim, dim}, {Dense, Sparse});
-  Tensor<float> result("B", {dim-2, dim-2}, {Dense, Sparse});
+  Tensor<float> matrix("A", {dim, dim}, f);
+  Tensor<float> result("B", {dim-2, dim-2}, f);
 
   srand(4357);
   for (int i = 0; i < dim; i++) {
@@ -24,16 +31,19 @@ static void BM_basic_windowing(benchmark::State& state) {
   }
   matrix.pack();
 
-  IndexVar i, j;
-  result(i, j) = matrix(i(1, dim-1), j(1, dim-1)) + matrix(i(1, dim-1), j(1, dim-1));
-  result.compile();
-  result.assemble();
 
   for (auto _ : state) {
-    // This code gets timed. Setup goes outside the loop.
+    state.PauseTiming();
+    IndexVar i, j;
+    result(i, j) = matrix(i(1, dim-1), j(1, dim-1)) + matrix(i(1, dim-1), j(1, dim-1));
+    result.compile();
+    result.assemble();
+    state.ResumeTiming();
     result.compute();
   }
 }
-// Have benchmarking report milliseconds and run for 10 iterations.
-BENCHMARK(BM_basic_windowing)->Unit(benchmark::kMillisecond)->Iterations(10);
 
+// Have benchmarking report milliseconds and run for 10 iterations.
+// Run an instance with both CSR and CSC formats.
+TACO_BENCH_ARG(bench_add_sparse_window, csr, CSR)->Apply(applyBenchSizes);
+TACO_BENCH_ARG(bench_add_sparse_window, csc, CSC)->Apply(applyBenchSizes);