Skip to content

Commit 95f0282

Browse files
committed
taco: fused ufunc benchmark on FROSTT tensors
1 parent 8f89f26 commit 95f0282

File tree

1 file changed

+83
-2
lines changed

1 file changed

+83
-2
lines changed

taco/ufuncs.cpp

+83-2
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ static void bench_ufunc_fused(benchmark::State& state, const Format& f) {
185185
result.compute();
186186
}
187187
}
188-
TACO_BENCH_ARGS(bench_ufunc_fused, csr, CSR)
189-
->ArgsProduct({{5000, 10000, 20000}});
188+
// TACO_BENCH_ARGS(bench_ufunc_fused, csr, CSR)
189+
// ->ArgsProduct({{5000, 10000, 20000}});
190190

191191
// UfuncInputCache is a cache for the input to ufunc benchmarks. These benchmarks
192192
// operate on a tensor loaded from disk and the same tensor shifted slightly. Since
@@ -308,6 +308,87 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Fun
308308

309309
FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)
310310

311+
enum FusedUfuncOp {
312+
XOR_AND = 1,
313+
};
314+
315+
static void bench_frostt_ufunc_fused(benchmark::State& state, std::string tnsPath, FusedUfuncOp op) {
316+
auto frosttTensorPath = getTacoTensorPath();
317+
frosttTensorPath += "FROSTT/";
318+
frosttTensorPath += tnsPath;
319+
320+
auto pathSplit = taco::util::split(tnsPath, "/");
321+
auto filename = pathSplit[pathSplit.size() - 1];
322+
auto tensorName = taco::util::split(filename, ".")[0];
323+
state.SetLabel(tensorName);
324+
325+
Tensor<int64_t> frosttTensor, other;
326+
std::tie(frosttTensor, other) = inputCache.getUfuncInput(frosttTensorPath, Sparse);
327+
Tensor<int64_t> third = shiftLastMode<int64_t, int64_t>("C", other);
328+
329+
for (auto _ : state) {
330+
state.PauseTiming();
331+
Tensor<int64_t> result("result", frosttTensor.getDimensions(), frosttTensor.getFormat());
332+
result.setAssembleWhileCompute(true);
333+
// We have to unfortunately perform this double nesting because for some reason
334+
// I get a TACO generated code compilation error trying to lift the ufunc operation
335+
// into lambda.
336+
switch (frosttTensor.getOrder()) {
337+
case 3: {
338+
IndexVar i, j, k;
339+
switch (op) {
340+
case XOR_AND: {
341+
result(i, j, k) = andOp(xorOp(frosttTensor(i, j, k), other(i, j, k)), third(i, j, k));
342+
break;
343+
}
344+
default:
345+
state.SkipWithError("invalid fused op");
346+
return;
347+
}
348+
break;
349+
}
350+
case 4: {
351+
IndexVar i, j, k, l;
352+
switch (op) {
353+
case XOR_AND: {
354+
result(i, j, k, l) = andOp(xorOp(frosttTensor(i, j, k, l), other(i, j, k, l)), third(i, j, k, l));
355+
break;
356+
}
357+
default:
358+
state.SkipWithError("invalid fused op");
359+
return;
360+
}
361+
break;
362+
}
363+
case 5: {
364+
IndexVar i, j, k, l, m;
365+
switch (op) {
366+
case XOR_AND: {
367+
result(i, j, k, l, m) = andOp(xorOp(frosttTensor(i, j, k, l, m), other(i, j, k, l, m)), third(i, j, k, l, m));
368+
break;
369+
}
370+
default:
371+
state.SkipWithError("invalid fused op");
372+
return;
373+
}
374+
break;
375+
}
376+
default:
377+
state.SkipWithError("invalid tensor dimension");
378+
return;
379+
}
380+
result.compile();
381+
state.ResumeTiming();
382+
383+
result.compute();
384+
}
385+
}
386+
387+
#define DECLARE_FROSTT_FUSED_UFUNC_BENCH(name, path) \
388+
TACO_BENCH_ARGS(bench_frostt_ufunc_fused, name/xorAndFused, path, XOR_AND); \
389+
390+
FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_FUSED_UFUNC_BENCH)
391+
311392
struct SuiteSparseTensors {
312393
SuiteSparseTensors() {
313394
auto ssTensorPath = getTacoTensorPath();

0 commit comments

Comments
 (0)