@@ -185,8 +185,8 @@ static void bench_ufunc_fused(benchmark::State& state, const Format& f) {
185
185
result.compute ();
186
186
}
187
187
}
188
- TACO_BENCH_ARGS (bench_ufunc_fused, csr, CSR)
189
- ->ArgsProduct({{5000 , 10000 , 20000 }});
188
+ // TACO_BENCH_ARGS(bench_ufunc_fused, csr, CSR)
189
+ // ->ArgsProduct({{5000, 10000, 20000}});
190
190
191
191
// UfuncInputCache is a cache for the input to ufunc benchmarks. These benchmarks
192
192
// operate on a tensor loaded from disk and the same tensor shifted slightly. Since
@@ -308,6 +308,87 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Fun
308
308
309
309
FOREACH_FROSTT_TENSOR (DECLARE_FROSTT_UFUNC_BENCH)
310
310
311
+ enum FusedUfuncOp {
312
+ XOR_AND = 1 ,
313
+ };
314
+
315
+ static void bench_frostt_ufunc_fused (benchmark::State& state, std::string tnsPath, FusedUfuncOp op) {
316
+ auto frosttTensorPath = getTacoTensorPath ();
317
+ frosttTensorPath += " FROSTT/" ;
318
+ frosttTensorPath += tnsPath;
319
+
320
+ auto pathSplit = taco::util::split (tnsPath, " /" );
321
+ auto filename = pathSplit[pathSplit.size () - 1 ];
322
+ auto tensorName = taco::util::split (filename, " ." )[0 ];
323
+ state.SetLabel (tensorName);
324
+
325
+ Tensor<int64_t > frosttTensor, other;
326
+ std::tie (frosttTensor, other) = inputCache.getUfuncInput (frosttTensorPath, Sparse);
327
+ Tensor<int64_t > third = shiftLastMode<int64_t , int64_t >(" C" , other);
328
+
329
+ for (auto _ : state) {
330
+ state.PauseTiming ();
331
+ Tensor<int64_t > result (" result" , frosttTensor.getDimensions (), frosttTensor.getFormat ());
332
+ result.setAssembleWhileCompute (true );
333
+ // We have to unfortunately perform this double nesting because for some reason
334
+ // I get a TACO generated code compilation error trying to lift the ufunc operation
335
+ // into lambda.
336
+ switch (frosttTensor.getOrder ()) {
337
+ case 3 : {
338
+ IndexVar i, j, k;
339
+ switch (op) {
340
+ case XOR_AND: {
341
+ result (i, j, k) = andOp (xorOp (frosttTensor (i, j, k), other (i, j, k)), third (i, j, k));
342
+ break ;
343
+ }
344
+ default :
345
+ state.SkipWithError (" invalid fused op" );
346
+ return ;
347
+ }
348
+ break ;
349
+ }
350
+ case 4 : {
351
+ IndexVar i, j, k, l;
352
+ switch (op) {
353
+ case XOR_AND: {
354
+ result (i, j, k, l) = andOp (xorOp (frosttTensor (i, j, k, l), other (i, j, k, l)), third (i, j, k, l));
355
+ break ;
356
+ }
357
+ default :
358
+ state.SkipWithError (" invalid fused op" );
359
+ return ;
360
+ }
361
+ break ;
362
+ }
363
+ case 5 : {
364
+ IndexVar i, j, k, l, m;
365
+ switch (op) {
366
+ case XOR_AND: {
367
+ result (i, j, k, l, m) = andOp (xorOp (frosttTensor (i, j, k, l, m), other (i, j, k, l, m)), third (i, j, k, l, m));
368
+ break ;
369
+ }
370
+ default :
371
+ state.SkipWithError (" invalid fused op" );
372
+ return ;
373
+ }
374
+ break ;
375
+ }
376
+ default :
377
+ state.SkipWithError (" invalid tensor dimension" );
378
+ return ;
379
+ }
380
+ result.compile ();
381
+ state.ResumeTiming ();
382
+
383
+ result.compute ();
384
+ }
385
+ }
386
+
387
+ #define DECLARE_FROSTT_FUSED_UFUNC_BENCH (name, path ) \
388
+ TACO_BENCH_ARGS (bench_frostt_ufunc_fused, name/xorAndFused, path, XOR_AND); \
389
+
390
+ FOREACH_FROSTT_TENSOR (DECLARE_FROSTT_FUSED_UFUNC_BENCH)
391
+
311
392
struct SuiteSparseTensors {
312
393
SuiteSparseTensors () {
313
394
auto ssTensorPath = getTacoTensorPath ();
0 commit comments