Skip to content

Commit 4bb5f25

Browse files
committed
Add in nestedXorOp to fused ufunc benchmark
1 parent 5a4d5fc commit 4bb5f25

File tree

2 files changed

+42
-15
lines changed

2 files changed

+42
-15
lines changed

numpy/ufuncs.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ def bench_pydata_ufunc_fused(tacoBench, dim):
4141
matrix1 = safeCastPydataTensorToInts(loader.random((dim, dim), 0.01, variant=1))
4242
matrix2 = safeCastPydataTensorToInts(loader.random((dim, dim), 0.01, variant=2))
4343
def bench():
44-
result = numpy.logical_and(numpy.logical_xor(matrix, matrix1), matrix2)
44+
result = numpy.logical_xor(numpy.logical_xor(matrix, matrix1), matrix2)
45+
print("nnz = ", result.nnz)
4546
return result
4647
tacoBench(bench)
4748

@@ -148,10 +149,12 @@ def bench():
148149
fusedFuncs = [
149150
lambda a, b, c: numpy.logical_and(numpy.logical_xor(a, b), c),
150151
lambda a, b, c: numpy.logical_or(numpy.logical_xor(a, b), c),
152+
lambda a, b, c: numpy.logical_xor(numpy.logical_xor(a, b), c),
151153
]
152154
fusedFuncNames = [
153155
"xorAndFused",
154156
"xorOrFused",
157+
#"xorXorFused",
155158
]
156159
fusedFuncs = zip(fusedFuncs, fusedFuncNames)
157160
@pytest.mark.parametrize("tensor", FROSTTTensors.getTensors())
@@ -161,6 +164,7 @@ def bench_pydata_frostt_fused_ufunc_sparse(tacoBench, tensor, func):
161164
third = PydataTensorShifter().shiftLastMode(other)
162165
def bench():
163166
c = func[0](frTensor, other, third)
167+
print("nnz = ", c.nnz)
164168
return c
165169
extra_info = dict()
166170
extra_info['tensor_str'] = str(tensor)

taco/ufuncs.cpp

+37-14
Original file line numberDiff line numberDiff line change
@@ -106,19 +106,21 @@ struct UnionLeftCompAlgebra {
106106
}
107107
};
108108

109-
// Logical Not Op and Algebra
110-
struct Power {
111-
ir::Expr operator()(const std::vector<ir::Expr> &v) {
112-
return ir::Literal(1, v.get)
113-
}
114-
};
115109

116110
struct CompAlgebra {
117111
IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
118112
return Complement(regions[0]);
119113
}
120114
};
121115

116+
struct NestedXorAlgebra {
117+
IterationAlgebra operator()(const std::vector<IndexExpr> & regions) {
118+
IterationAlgebra intersect2 = Union(Intersect(regions[2], Union(regions[0], regions[1])), Intersect(regions[0], Union(regions[2], regions[1])));
119+
IterationAlgebra intersect3 = Intersect(Intersect(regions[0], regions[1]), regions[2]);
120+
IterationAlgebra unionComplement = Complement(Union(Union(regions[0], regions[1]), regions[2]));
121+
return Union(Complement(Union(intersect2, unionComplement)), intersect3);
122+
}
123+
};
122124

123125
template <int I, class...Ts>
124126
decltype(auto) get(Ts&&... ts) {
@@ -212,28 +214,34 @@ Func rightShift("right_shift", RightShift(), leftIncAlgebra());
212214
Func xorOp("logical_xor", GeneralAdd(), xorAlgebra());
213215
Func andOp("logical_and", GeneralAdd(), andAlgebra());
214216
Func orOp("logical_or", GeneralAdd(), orAlgebra());
215-
217+
Func nestedXorOp("fused_xor", GeneralAdd(), NestedXorAlgebra());
216218
static void bench_ufunc_fused(benchmark::State& state, const Format& f) {
217219
int dim = state.range(0);
218220
auto sparsity = 0.01;
219-
Tensor<double> matrix = loadRandomTensor("A", {dim, dim}, sparsity, f);
220-
Tensor<double> matrix1 = loadRandomTensor("B", {dim, dim}, sparsity, f, 1 /* variant */);
221-
Tensor<double> matrix2 = loadRandomTensor("C", {dim, dim}, sparsity, f, 2 /* variant */);
221+
Tensor<int64_t> matrix = castToType<int64_t>("A", loadRandomTensor("A", {dim, dim}, sparsity, f));
222+
Tensor<int64_t> matrix1 = castToType<int64_t>("B", loadRandomTensor("B", {dim, dim}, sparsity, f, 1 /* variant */));
223+
Tensor<int64_t> matrix2 = castToType<int64_t>("C", loadRandomTensor("C", {dim, dim}, sparsity, f, 2 /* variant */));
222224

223225
for (auto _ : state) {
224226
state.PauseTiming();
225-
Tensor<double> result("result", {dim, dim}, f);
227+
Tensor<int64_t> result("result", {dim, dim}, f);
226228
IndexVar i("i"), j("j");
227-
result(i, j) = andOp(xorOp(matrix(i, j), matrix1(i, j)), matrix2(i, j));
229+
result(i, j) = nestedXorOp(matrix(i, j), matrix1(i, j), matrix2(i, j));
228230
result.setAssembleWhileCompute(true);
229231
result.compile();
230232
state.ResumeTiming();
231233

232234
result.compute();
235+
result = result.removeExplicitZeros(result.getFormat());
236+
int nnz = 0;
237+
for (auto& it : iterate<int64_t>(result)) {
238+
nnz++;
239+
}
240+
std::cout << "Result NNZ = " << nnz << std::endl;
233241
}
234242
}
235-
// TACO_BENCH_ARGS(bench_ufunc_fused, csr, CSR)
236-
// ->ArgsProduct({{5000, 10000, 20000}});
243+
TACO_BENCH_ARGS(bench_ufunc_fused, csr, CSR)
244+
->ArgsProduct({{5000, 10000, 20000}});
237245

238246
// UfuncInputCache is a cache for the input to ufunc benchmarks. These benchmarks
239247
// operate on a tensor loaded from disk and the same tensor shifted slightly. Since
@@ -362,6 +370,7 @@ FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)
362370
enum FusedUfuncOp {
363371
XOR_AND = 1,
364372
XOR_OR = 2,
373+
XOR_XOR = 3,
365374
};
366375

367376
static void bench_frostt_ufunc_fused(benchmark::State& state, std::string tnsPath, FusedUfuncOp op) {
@@ -397,6 +406,10 @@ static void bench_frostt_ufunc_fused(benchmark::State& state, std::string tnsPat
397406
result(i, j, k) = orOp(xorOp(frosttTensor(i, j, k), other(i, j, k)), third(i, j, k));
398407
break;
399408
}
409+
case XOR_XOR: {
410+
result(i, j, k) = nestedXorOp(frosttTensor(i, j, k), other(i, j, k), third(i, j, k));
411+
break;
412+
}
400413
default:
401414
state.SkipWithError("invalid fused op");
402415
return;
@@ -414,6 +427,10 @@ static void bench_frostt_ufunc_fused(benchmark::State& state, std::string tnsPat
414427
result(i, j, k, l) = orOp(xorOp(frosttTensor(i, j, k, l), other(i, j, k, l)), third(i, j, k, l));
415428
break;
416429
}
430+
case XOR_XOR: {
431+
result(i, j, k, l) = nestedXorOp(frosttTensor(i, j, k, l), other(i, j, k, l), third(i, j, k, l));
432+
break;
433+
}
417434
default:
418435
state.SkipWithError("invalid fused op");
419436
return;
@@ -431,6 +448,10 @@ static void bench_frostt_ufunc_fused(benchmark::State& state, std::string tnsPat
431448
result(i, j, k, l, m) = orOp(xorOp(frosttTensor(i, j, k, l, m), other(i, j, k, l, m)), third(i, j, k, l, m));
432449
break;
433450
}
451+
case XOR_XOR: {
452+
result(i, j, k, l, m) = nestedXorOp(frosttTensor(i, j, k, l, m), other(i, j, k, l, m), third(i, j, k, l, m));
453+
break;
454+
}
434455
default:
435456
state.SkipWithError("invalid fused op");
436457
return;
@@ -445,12 +466,14 @@ static void bench_frostt_ufunc_fused(benchmark::State& state, std::string tnsPat
445466
state.ResumeTiming();
446467

447468
result.compute();
469+
448470
}
449471
}
450472

451473
#define DECLARE_FROSTT_FUSED_UFUNC_BENCH(name, path) \
452474
TACO_BENCH_ARGS(bench_frostt_ufunc_fused, name/xorAndFused, path, XOR_AND); \
453475
TACO_BENCH_ARGS(bench_frostt_ufunc_fused, name/xorOrFused, path, XOR_OR); \
476+
// TACO_BENCH_ARGS(bench_frostt_ufunc_fused, name/xorXorFused, path, XOR_XOR); \
454477
455478
FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_FUSED_UFUNC_BENCH)
456479

0 commit comments

Comments
 (0)