|
| 1 | +#include "bench.h" |
| 2 | +#include "benchmark/benchmark.h" |
| 3 | + |
| 4 | +#include "taco/tensor.h" |
| 5 | +#include "taco/format.h" |
| 6 | +#include "taco/index_notation/index_notation.h" |
| 7 | +#include "taco/index_notation/tensor_operator.h" |
| 8 | + |
| 9 | +using namespace taco; |
| 10 | + |
| 11 | +// XOR Op and Algebra |
| 12 | +struct GeneralAdd { |
| 13 | + ir::Expr operator()(const std::vector<ir::Expr> &v) { |
| 14 | + taco_iassert(v.size() >= 1) << "Add operator needs at least one operand"; |
| 15 | + if (v.size() == 1) |
| 16 | + return ir::Add::make(v[0], ir::Literal::zero(v[0].type())); |
| 17 | + ir::Expr add = ir::Add::make(v[0], v[1]); |
| 18 | + for (size_t idx = 2; idx < v.size(); ++idx) { |
| 19 | + add = ir::Add::make(add, v[idx]); |
| 20 | + } |
| 21 | + return add; |
| 22 | + } |
| 23 | +}; |
| 24 | + |
| 25 | +struct xorAlgebra { |
| 26 | + IterationAlgebra operator()(const std::vector<IndexExpr>& regions) { |
| 27 | + IterationAlgebra noIntersect = Complement(Intersect(regions[0], regions[1])); |
| 28 | + return Intersect(noIntersect, Union(regions[0], regions[1])); |
| 29 | + } |
| 30 | +}; |
| 31 | + |
| 32 | +struct andAlgebra { |
| 33 | + IterationAlgebra operator()(const std::vector<IndexExpr>& regions) { |
| 34 | + return Intersect(regions[0], regions[1]); |
| 35 | + } |
| 36 | +}; |
| 37 | + |
| 38 | +Func xorOp1("logical_xor", GeneralAdd(), xorAlgebra()); |
| 39 | +Func andOp1("logical_and", GeneralAdd(), andAlgebra()); |
| 40 | + |
| 41 | +static void bench_imaging_xor(benchmark::State& state, const Format& f) { |
| 42 | + int dim = state.range(0); |
| 43 | + auto sparsity = 0.01; |
| 44 | + Tensor<int64_t> matrix = castToType<int64_t>("A", loadImageTensor("A", 0, f)); |
| 45 | + Tensor<int64_t> matrix1 = castToType<int64_t>("B", loadImageTensor("B", 0, f, 1 /* variant */)); |
| 46 | + |
| 47 | + for (auto _ : state) { |
| 48 | + state.PauseTiming(); |
| 49 | + Tensor<int64_t> result("result", {dim, dim}, f, 1); |
| 50 | + IndexVar i("i"), j("j"); |
| 51 | + result(i, j) = xorOp1(matrix(i, j), matrix1(i, j)); |
| 52 | + result.setAssembleWhileCompute(true); |
| 53 | + result.compile(); |
| 54 | + state.ResumeTiming(); |
| 55 | + result.compute(); |
| 56 | + result = result.removeExplicitZeros(result.getFormat()); |
| 57 | + |
| 58 | + int nnz = 0; |
| 59 | + for (auto& it : iterate<int64_t>(result)) { |
| 60 | + nnz++; |
| 61 | + } |
| 62 | + std::cout << "Result NNZ = " << nnz << std::endl; |
| 63 | + |
| 64 | + } |
| 65 | +} |
| 66 | + TACO_BENCH_ARGS(bench_imaging_xor, csr, CSR) |
| 67 | + ->ArgsProduct({{5000, 10000, 20000}}); |
| 68 | + |
| 69 | +static void bench_imaging_fused(benchmark::State& state, const Format& f) { |
| 70 | + int dim = state.range(0); |
| 71 | + auto sparsity = 0.01; |
| 72 | + Tensor<int64_t> matrix = castToType<int64_t>("A", loadImageTensor("A", 0, f)); |
| 73 | + Tensor<int64_t> matrix1 = castToType<int64_t>("B", loadImageTensor("B", 0, f, 1 /* variant */)); |
| 74 | + Tensor<int64_t> matrix2 = castToType<int64_t>("C", loadImageTensor("C", 0, f, 2 /* variant */)); |
| 75 | + |
| 76 | + for (auto _ : state) { |
| 77 | + state.PauseTiming(); |
| 78 | + Tensor<int64_t> result("result", {dim, dim}, f, 1); |
| 79 | + IndexVar i("i"), j("j"); |
| 80 | + result(i, j) = xorOp1(andOp1(matrix(i, j), matrix2(i, j)), andOp1(matrix1(i, j), matrix2(i, j))); |
| 81 | + result.setAssembleWhileCompute(false); |
| 82 | + result.compile(); |
| 83 | + state.ResumeTiming(); |
| 84 | + result.assemble(); |
| 85 | + result.compute(); |
| 86 | + result = result.removeExplicitZeros(result.getFormat()); |
| 87 | + |
| 88 | + int nnz = 0; |
| 89 | + for (auto& it : iterate<int64_t>(result)) { |
| 90 | + nnz++; |
| 91 | + } |
| 92 | + std::cout << "Result NNZ = " << nnz << std::endl; |
| 93 | + } |
| 94 | +} |
| 95 | + TACO_BENCH_ARGS(bench_imaging_fused, csr, CSR) |
| 96 | + ->ArgsProduct({{5000, 10000, 20000}}); |
0 commit comments