Skip to content

Commit 2d8046f

Browse files
committed
*: several updates
* Point `taco` submodule at the `array_algebra` branch on TACO * Clean up more targetse in the makefile * Add a file with some common benchmark utilities * Add a simple benchmark to find the cutoff points between TACO vs numpy
1 parent 0348754 commit 2d8046f

File tree

8 files changed

+122
-22
lines changed

8 files changed

+122
-22
lines changed

Makefile

+2-6
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@ else
1414
taco/build/taco-bench $(BENCHFLAGS) --benchmark_filter="$(BENCHES)"
1515
endif
1616

17-
taco/build/taco-bench: check-and-reinit-submodules taco/build/Makefile
18-
$(MAKE) -C taco/build taco-bench
19-
20-
taco/build/Makefile: taco/benchmark/googletest
21-
mkdir -p taco/build
22-
cd taco/build/ && cmake ../
17+
taco/build/taco-bench: check-and-reinit-submodules taco/benchmark/googletest
18+
cd taco/build/ && cmake ../ && make taco-bench
2319

2420
taco/benchmark/googletest: check-and-reinit-submodules
2521
if [ ! -d "taco/benchmark/googletest" ] ; then git clone https://github.com/google/googletest taco/benchmark/googletest; fi

numpy/conftest.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@
33
def tacoBench(benchmark):
44
def f(func):
55
# Take statistics based on 10 rounds.
6-
benchmark.pedantic(func, rounds=10, iterations=1)
6+
benchmark.pedantic(func, rounds=10, iterations=5)
7+
# How do i set please use 10 rounds...
8+
# benchmark(func)
79
return f

numpy/threshold.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import numpy
2+
from scipy.sparse import random
3+
import pytest
4+
5+
@pytest.mark.parametrize("dim", [250, 500, 750, 1000, 2500, 5000, 7500, 8000])
6+
def bench_add_dense_threshold(tacoBench, dim):
7+
matrix1 = random(dim, dim, format="csr").todense()
8+
matrix2 = random(dim, dim, format="csr").todense()
9+
def bench():
10+
res = matrix1 + matrix2
11+
tacoBench(bench)

taco/CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ add_subdirectory(benchmark)
77

88
include_directories(taco taco/include benchmark/include)
99

10-
file(GLOB TEST_SOURCES *.cpp)
10+
file(GLOB TEST_SOURCES *.cpp *.h)
1111

1212
set(CMAKE_CXX_FLAGS "${C_CXX_FLAGS} -std=c++14")
1313

14-
add_executable(taco-bench ${TEST_SOURCES})
14+
add_executable(taco-bench ${TEST_SOURCES} bench.h)
1515
target_link_libraries(taco-bench benchmark::benchmark)
1616
target_link_libraries(taco-bench taco)
1717

taco/bench.h

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef TACO_BENCH_BENCH_H
2+
#define TACO_BENCH_BENCH_H
3+
4+
#include "benchmark/benchmark.h"
5+
6+
// Register a benchmark with the following options:
7+
// * Millisecond output display
8+
// * 10 data points
9+
// * Reporting of avg/stddev/median
10+
// * Wall-clock time, rather than CPU time.
11+
#define TACO_BENCH(bench) \
12+
BENCHMARK(bench) \
13+
->Unit(benchmark::kMillisecond) \
14+
->Repetitions(10) \
15+
->Iterations(5) \
16+
->ReportAggregatesOnly(true) \
17+
->UseRealTime()
18+
19+
// TACO_BENCH_ARG is similar to TACO_BENCH but allows for passing
20+
// of an arbitrarily typed argument to the benchmark function.
21+
// TODO (rohany): Make this take in only 1 argument.
22+
// TODO (rohany): Don't specify the time here, but do it at the command line.
23+
#define TACO_BENCH_ARG(bench, name, arg) \
24+
BENCHMARK_CAPTURE(bench, name, arg) \
25+
->Unit(benchmark::kMicrosecond) \
26+
->Repetitions(10) \
27+
->Iterations(5) \
28+
->ReportAggregatesOnly(true) \
29+
->UseRealTime()
30+
31+
#endif //TACO_BENCH_BENCH_H

taco/taco

Submodule taco updated 249 files

taco/threshold.cpp

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include "bench.h"
2+
#include "benchmark/benchmark.h"
3+
4+
#include "taco/tensor.h"
5+
#include "taco/format.h"
6+
#include "taco/index_notation/index_notation.h"
7+
8+
using namespace taco;
9+
10+
// bench_add_sparse_threshold adds two tensors of a given dimension and sparsity.
11+
// It does this range on the TACO side to see when TACO becomes faster than NumPY.
12+
static void bench_add_sparse_threshold(benchmark::State& state, float sparsity) {
13+
int dim = state.range(0);
14+
Tensor<float> matrix1("A", {dim, dim}, CSR);
15+
Tensor<float> matrix2("B", {dim, dim}, CSR);
16+
Tensor<float> result("C", {dim, dim}, CSR);
17+
18+
srand(4357);
19+
// TODO (rohany): Move this into a helper method.
20+
for (int i = 0; i < dim; i++) {
21+
for (int j = 0; j < dim; j++) {
22+
float rand_float = (float)rand()/(float)(RAND_MAX);
23+
if (rand_float < sparsity) {
24+
matrix1.insert({i, j}, (float) ((int) (rand_float*3/sparsity)));
25+
}
26+
27+
rand_float = (float)rand()/(float)(RAND_MAX);
28+
if (rand_float < sparsity) {
29+
matrix2.insert({i, j}, (float) ((int) (rand_float*3/sparsity)));
30+
}
31+
}
32+
}
33+
matrix1.pack(); matrix2.pack();
34+
35+
IndexVar i, j;
36+
result(i, j) = matrix1(i, j) + matrix2(i, j);
37+
result.compile();
38+
result.assemble();
39+
40+
for (auto _ : state) {
41+
result.compute();
42+
}
43+
}
44+
static void applyBenchSizes(benchmark::internal::Benchmark* b) {
45+
b->ArgsProduct({{250, 500, 750, 1000, 2500, 5000, 7500, 8000}});
46+
}
47+
// TACO_BENCH_ARG(bench_add_sparse_threshold, 0.001, 0.001)->Apply(applyBenchSizes);
48+
// TACO_BENCH_ARG(bench_add_sparse_threshold, 0.01, 0.01)->Apply(applyBenchSizes);
49+
// TACO_BENCH_ARG(bench_add_sparse_threshold, 0.05, 0.05)->Apply(applyBenchSizes);
50+
TACO_BENCH_ARG(bench_add_sparse_threshold, 0.25, 0.25)->Apply(applyBenchSizes);

taco/windowing.cpp

+22-12
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include "bench.h"
12
#include "benchmark/benchmark.h"
23

34
#include "taco/tensor.h"
@@ -6,12 +7,18 @@
67

78
using namespace taco;
89

9-
static void BM_basic_windowing(benchmark::State& state) {
10-
auto dim = 10000;
10+
// applyBenchSizes is used to configure the benchmarks to run with the
11+
// input arguments.
12+
static void applyBenchSizes(benchmark::internal::Benchmark* b) {
13+
// Currently considering these size square tensors.
14+
b->ArgsProduct({{5000, 10000, 20000}});
15+
}
16+
17+
static void bench_add_sparse_window(benchmark::State& state, const Format& f) {
18+
int dim = state.range(0);
1119
auto sparsity = 0.01;
12-
// CSR format.
13-
Tensor<float> matrix("A", {dim, dim}, {Dense, Sparse});
14-
Tensor<float> result("B", {dim-2, dim-2}, {Dense, Sparse});
20+
Tensor<float> matrix("A", {dim, dim}, f);
21+
Tensor<float> result("B", {dim-2, dim-2}, f);
1522

1623
srand(4357);
1724
for (int i = 0; i < dim; i++) {
@@ -24,16 +31,19 @@ static void BM_basic_windowing(benchmark::State& state) {
2431
}
2532
matrix.pack();
2633

27-
IndexVar i, j;
28-
result(i, j) = matrix(i(1, dim-1), j(1, dim-1)) + matrix(i(1, dim-1), j(1, dim-1));
29-
result.compile();
30-
result.assemble();
3134

3235
for (auto _ : state) {
33-
// This code gets timed. Setup goes outside the loop.
36+
state.PauseTiming();
37+
IndexVar i, j;
38+
result(i, j) = matrix(i(1, dim-1), j(1, dim-1)) + matrix(i(1, dim-1), j(1, dim-1));
39+
result.compile();
40+
result.assemble();
41+
state.ResumeTiming();
3442
result.compute();
3543
}
3644
}
37-
// Have benchmarking report milliseconds and run for 10 iterations.
38-
BENCHMARK(BM_basic_windowing)->Unit(benchmark::kMillisecond)->Iterations(10);
3945

46+
// Have benchmarking report milliseconds and run for 10 iterations.
47+
// Run an instance with both CSR and CSC formats.
48+
TACO_BENCH_ARG(bench_add_sparse_window, csr, CSR)->Apply(applyBenchSizes);
49+
TACO_BENCH_ARG(bench_add_sparse_window, csc, CSC)->Apply(applyBenchSizes);

0 commit comments

Comments
 (0)