@@ -14,7 +14,60 @@ static void applyBenchSizes(benchmark::internal::Benchmark* b) {
14
14
b->ArgsProduct ({{5000 , 10000 , 20000 }});
15
15
}
16
16
17
- static void bench_add_sparse_window (benchmark::State& state, const Format& f) {
17
+ // WindowConfig corresponds to what sort of window size should be used
18
+ // when evaluating standard windowing benchmarks. This should stay in
19
+ // line with the definition in numpy/windowing.py.
20
+ enum WindowConfig {
21
+ Constant,
22
+ ConstantFraction,
23
+ AlmostWhole,
24
+ Whole,
25
+ NoWindow,
26
+ };
27
+
28
+ #define FOREACH_WINDOW_CONFIG (__func__ ) \
29
+ __func__ (Constant, Constant) \
30
+ __func__(ConstantFraction, ConstantFraction) \
31
+ __func__(AlmostWhole, AlmostWhole) \
32
+ __func__(Whole, Whole) \
33
+ __func__(NoWindow, NoWindow)
34
+
35
+ Tensor<double> windowedTensorOp(Tensor<double > input, int dim, WindowConfig config) {
36
+ IndexVar i, j;
37
+ switch (config) {
38
+ case Constant: {
39
+ Tensor<double > result (" B" , {500 , 500 }, input.getFormat ());
40
+ result (i, j) = input (i (250 , 750 ), j (250 , 750 )) + input (i (250 , 750 ), j (250 , 750 ));
41
+ return result;
42
+ }
43
+ case ConstantFraction: {
44
+ int size = dim / 4 ;
45
+ int start = dim / 4 ;
46
+ Tensor<double > result (" B" , {size, size}, input.getFormat ());
47
+ result (i, j) = input (i (start, start + size), j (start, start + size)) + input (i (start, start + size), j (start, start + size));
48
+ return result;
49
+ }
50
+ case AlmostWhole: {
51
+ Tensor<double > result (" B" , {dim-2 , dim-2 }, input.getFormat ());
52
+ result (i, j) = input (i (1 , dim-1 ), j (1 , dim-1 )) + input (i (1 , dim-1 ), j (1 , dim-1 ));
53
+ return result;
54
+ }
55
+ case Whole: {
56
+ Tensor<double > result (" B" , {dim, dim}, input.getFormat ());
57
+ result (i, j) = input (i (0 , dim), j (0 , dim)) + input (i (0 , dim), j (0 , dim));
58
+ return result;
59
+ }
60
+ case NoWindow: {
61
+ Tensor<double > result (" B" , {dim, dim}, input.getFormat ());
62
+ result (i, j) = input (i, j) + input (i, j);
63
+ return result;
64
+ }
65
+ default :
66
+ assert (false );
67
+ }
68
+ }
69
+
70
+ static void bench_add_sparse_window (benchmark::State& state, const Format& f, WindowConfig config) {
18
71
int dim = state.range (0 );
19
72
auto sparsity = 0.01 ;
20
73
Tensor<double > matrix = loadRandomTensor (" A" , {dim, dim}, sparsity, f);
@@ -23,9 +76,7 @@ static void bench_add_sparse_window(benchmark::State& state, const Format& f) {
23
76
for (auto _ : state) {
24
77
// Setup.
25
78
state.PauseTiming ();
26
- Tensor<double > result (" B" , {dim-2 , dim-2 }, f);
27
- IndexVar i, j;
28
- result (i, j) = matrix (i (1 , dim-1 ), j (1 , dim-1 )) + matrix (i (1 , dim-1 ), j (1 , dim-1 ));
79
+ auto result = windowedTensorOp (matrix, dim, config);
29
80
result.compile ();
30
81
result.assemble ();
31
82
state.ResumeTiming ();
@@ -34,10 +85,11 @@ static void bench_add_sparse_window(benchmark::State& state, const Format& f) {
34
85
}
35
86
}
36
87
37
- // Have benchmarking report milliseconds and run for 10 iterations.
38
- // Run an instance with both CSR and CSC formats.
39
- TACO_BENCH_ARG (bench_add_sparse_window, csr, CSR)->Apply(applyBenchSizes);
40
- TACO_BENCH_ARG (bench_add_sparse_window, csc, CSC)->Apply(applyBenchSizes);
88
+ #define DECLARE_ADD_SPARSE_WINDOW_BENCH (configName, config ) \
89
+ TACO_BENCH_ARGS (bench_add_sparse_window, csr/configName, CSR, config)->Apply(applyBenchSizes); \
90
+ TACO_BENCH_ARGS (bench_add_sparse_window, csc/configName, CSC, config)->Apply(applyBenchSizes);
91
+
92
+ FOREACH_WINDOW_CONFIG (DECLARE_ADD_SPARSE_WINDOW_BENCH)
41
93
42
94
static void bench_add_sparse_strided_window(benchmark::State& state, const Format& f) {
43
95
int dim = state.range (0 );
0 commit comments