@@ -55,22 +55,31 @@ struct xorAndAlgebra {
55
55
}
56
56
};
57
57
58
+ struct testConstructionAlgebra {
59
+ IterationAlgebra operator ()(const std::vector<IndexExpr>& regions) {
60
+ auto m1 = Union (Complement (regions[0 ]), Complement (regions[2 ]));
61
+ auto m2 = Union (Complement (regions[1 ]), Complement (regions[2 ]));
62
+ return Intersect (m1, m2);
63
+ }
64
+ };
65
+
58
66
Func xorOp1 (" logical_xor" , Boolean (), xorAlgebra());
59
67
Func andOp1 (" logical_and" , Boolean (), andAlgebra());
60
68
Func xorAndOp (" fused_xor_and" , Boolean (), xorAndAlgebra());
69
+ Func testOp (" test" , Boolean (), testConstructionAlgebra());
61
70
static void bench_image_xor (benchmark::State& state, const Format& f) {
62
71
int num = state.range (0 );
63
72
auto t1 = 0.5 ;
64
73
auto t2 = 0.55 ;
65
- Tensor<int64_t > matrix1 = castToType <int64_t >(" A" , loadImageTensor (" A" , num, f, t1, 1 /* variant */ ));
66
- Tensor<int64_t > matrix2 = castToType <int64_t >(" B" , loadImageTensor (" B" , num, f, t2, 2 /* variant */ ));
74
+ Tensor<int64_t > matrix1 = castToTypeZero <int64_t >(" A" , loadImageTensor (" A" , num, f, t1, 1 /* variant */ ));
75
+ Tensor<int64_t > matrix2 = castToTypeZero <int64_t >(" B" , loadImageTensor (" B" , num, f, t2, 2 /* variant */ ));
67
76
auto dims = matrix1.getDimensions ();
68
77
69
78
for (auto _ : state) {
70
79
state.PauseTiming ();
71
80
Tensor<int64_t > result (" result" , dims, f, 1 );
72
81
IndexVar i (" i" ), j (" j" );
73
- result (i, j) = xorOp1 (matrix1 (i, j), matrix2 (i, j));
82
+ result (i, j) = testOp (matrix1 (i, j), matrix2 (i, j));
74
83
result.setAssembleWhileCompute (true );
75
84
result.compile ();
76
85
state.ResumeTiming ();
@@ -86,7 +95,7 @@ static void bench_image_xor(benchmark::State& state, const Format& f) {
86
95
}
87
96
}
88
97
static void CustomArguments (benchmark::internal::Benchmark* b) {
89
- for (int i = 1 ; i <= 1 ; ++i)
98
+ for (int i = 1 ; i <= 11 ; ++i)
90
99
b->Args ({i});
91
100
}
92
101
TACO_BENCH_ARGS (bench_image_xor, csr, CSR)->Apply(CustomArguments);
@@ -95,11 +104,14 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
95
104
int num = state.range (0 );
96
105
auto t1 = 0.5 ;
97
106
auto t2 = 0.55 ;
98
- Tensor<int64_t > matrix1 = castToType <int64_t >(" A" , loadImageTensor (" A" , num, f, t1, 1 /* variant */ ));
99
- Tensor<int64_t > matrix2 = castToType <int64_t >(" B" , loadImageTensor (" B" , num, f, t2, 2 /* variant */ ));
100
- Tensor<int64_t > matrix3 = castToType <int64_t >(" C" , loadImageTensor (" C" , num, f, 3 /* variant */ ));
107
+ Tensor<int64_t > matrix1 = castToTypeZero <int64_t >(" A" , loadImageTensor (" A" , num, f, t1, 1 /* variant */ ));
108
+ Tensor<int64_t > matrix2 = castToTypeZero <int64_t >(" B" , loadImageTensor (" B" , num, f, t2, 2 /* variant */ ));
109
+ Tensor<int64_t > matrix3 = castToTypeZero <int64_t >(" C" , loadImageTensor (" C" , num, f, 3 /* variant */ ));
101
110
auto dims = matrix1.getDimensions ();
102
111
112
+ // write("temp/taco-mat1-" + std::to_string(num) + ".tns", matrix1);
113
+ // write("temp/taco-mat2-" + std::to_string(num) + ".tns", matrix2);
114
+ // write("temp/taco-mat3-" + std::to_string(num) + ".tns", matrix3);
103
115
int nnz = 0 ;
104
116
for (auto & it : iterate<int64_t >(matrix1)) {
105
117
nnz++;
@@ -115,35 +127,31 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
115
127
nnz++;
116
128
}
117
129
std::cout << " Matrix3 NNZ = " << nnz << std::endl;
130
+
118
131
for (auto _ : state) {
119
132
state.PauseTiming ();
120
133
Tensor<int64_t > result (" result" , dims, f, 0 );
121
- Tensor<int64_t > temp1 (" t1" , dims, f, 0 );
122
- Tensor<int64_t > temp2 (" t2" , dims, f, 0 );
134
+
123
135
IndexVar i (" i" ), j (" j" );
124
- // temp1(i,j) = andOp1(matrix1(i, j), matrix3(i, j));
125
- // temp2(i,j) = andOp1(matrix2(i, j), matrix3(i, j));
126
- // result(i, j) = xorOp1(temp1(i,j), temp2(i,j));
127
- // result(i, j) = xorOp1(andOp1(matrix1(i, j), matrix3(i, j)), andOp1(matrix2(i, j), matrix3(i, j)));
128
- result (i, j) = xorAndOp (matrix1 (i, j), matrix2 (i, j), matrix3 (i, j));
136
+ result (i, j) = testOp (matrix1 (i, j), matrix2 (i, j), matrix3 (i, j));
129
137
IndexStmt stmt = result.getAssignment ().concretize ();
130
138
result.setAssembleWhileCompute (true );
131
139
result.compile ();
132
140
state.ResumeTiming ();
133
141
result.compute ();
134
- temp1 = temp1.removeExplicitZeros (temp1.getFormat ());
135
- temp2 = temp2.removeExplicitZeros (temp2.getFormat ());
136
142
result = result.removeExplicitZeros (result.getFormat ());
137
143
int nnz = 0 ;
138
144
for (auto & it : iterate<int64_t >(result)) {
139
145
nnz++;
140
146
}
141
147
142
148
std::cout << " Result NNZ = " << nnz << std::endl;
143
- std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default (std::cout, ir::CodeGen::ImplementationGen);
144
- ir::Stmt compute = lower (stmt, " compute" , false , true );
145
- codegen->compile (compute, true );
146
- // std::cout << result << std::endl;
149
+ // write("temp/taco-result" + std::to_string(num) + ".tns", result);
150
+ // Used to print out generated TACO code
151
+ // std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen);
152
+ // ir::Stmt compute = lower(stmt, "compute", false, true);
153
+ // codegen->compile(compute, true);
147
154
}
148
155
}
156
+ TACO_BENCH_ARGS (bench_image_fused, csr, CSR)->Apply(CustomArguments);
149
157
TACO_BENCH_ARGS (bench_image_fused, csr, CSR)->Apply(CustomArguments);
0 commit comments