Merge branch 'main' of https://github.com/tensor-compiler/array-programming-benchmarks into main

stephenchouca · stephenchouca · commit 6298b390ca2b · 2021-03-10T12:56:44.000-05:00
diff --git a/numpy/ufuncs.py b/numpy/ufuncs.py
@@ -2,7 +2,7 @@
 from scipy.sparse import random, csr_matrix
 import sparse
 import pytest
-from util import TensorCollectionFROSTT, PydataTensorShifter
+from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader
 
 # TODO (rohany): Ask hameer about this. pydata/sparse isn't happy when
 #  given this ufunc to evaluate.
@@ -90,12 +90,40 @@ def bench():
 # Run benchmarks against the FROSTT collection.
 FROSTTTensors = TensorCollectionFROSTT()
 @pytest.mark.parametrize("tensor", FROSTTTensors.getTensors(), ids=FROSTTTensors.getTensorNames())
-def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor):
-    frTensor = tensor.load()
+@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
+def bench_pydata_frostt_ufunc_sparse(tacoBench, tensor, ufunc):
+    frTensor = tensor.load().astype('int64')
     shifter = PydataTensorShifter()
     other = shifter.shiftLastMode(frTensor).astype('int64')
     def bench():
-        # TODO (rohany): Expand this test beyond ldexp.
-        c = numpy.ldexp(frTensor, other)
+        c = ufunc(frTensor, other)
+        return c
+    tacoBench(bench)
+
+# Run benchmarks against the SuiteSparse collection.
+SuiteSparseTensors = TensorCollectionSuiteSparse()
+@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
+@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
+def bench_pydata_suitesparse_ufunc_sparse(tacoBench, tensor, ufunc):
+    ssTensor = tensor.load(PydataMatrixMarketTensorLoader()).astype('int64')
+    shifter = PydataTensorShifter()
+    other = shifter.shiftLastMode(ssTensor).astype('int64')
+    def bench():
+        c = ufunc(ssTensor, other)
+        return c
+    tacoBench(bench)
+
+# TODO (rohany): scipy doesn't support these, I forgot. If that's the case,
+#  do we really need to compare against suitesparse?
+@pytest.mark.skip(reason="scipy doesn't support this actually")
+@pytest.mark.parametrize("tensor", SuiteSparseTensors.getTensors(), ids=SuiteSparseTensors.getTensorNames())
+@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift])
+@pytest.mark.parametrize("format", ["csr", "csc"])
+def bench_scipy_suitesparse_ufunc_sparse(tacoBench, tensor, ufunc, format):
+    ssTensor = tensor.load(ScipyMatrixMarketTensorLoader(format)).astype('int64')
+    shifter = ScipyTensorShifter(format)
+    other = shifter.shiftLastMode(ssTensor).astype('int64')
+    def bench():
+        c = ufunc(ssTensor, other)
         return c
     tacoBench(bench)
diff --git a/numpy/util.py b/numpy/util.py
@@ -1,4 +1,5 @@
 import scipy.sparse
+import scipy.io
 import sparse
 import os
 import glob
@@ -167,7 +168,9 @@ def shiftLastMode(self, tensor):
         for i in range(len(data)):
             for j in range(len(tensor.shape)):
                 resultCoords[j][i] = coords[j][i]
-            resultValues[i] = data[i]
+            # resultValues[i] = data[i]
+            # TODO (rohany): Temporarily use a constant as the value.
+            resultValues[i] = 2
             resultCoords[-1][i] = (resultCoords[-1][i] + 1) % tensor.shape[-1]
         return sparse.COO(resultCoords, resultValues, tensor.shape)
 
@@ -183,10 +186,55 @@ def shiftLastMode(self, tensor):
         for coord, val in dok.items():
             newCoord = list(coord[:])
             newCoord[-1] = (newCoord[-1] + 1) % tensor.shape[-1]
-            result[tuple(newCoord)] = val
+            # result[tuple(newCoord)] = val
+            # TODO (rohany): Temporarily use a constant as the value.
+            result[tuple(newCoord)] = 2
         if self.format == "csr":
             return scipy.sparse.csr_matrix(result)
         elif self.format == "csc":
             return scipy.sparse.csc_matrix(result)
         else:
             assert(False)
+
+class ScipyMatrixMarketTensorLoader:
+    def __init__(self, format):
+        self.format = format 
+
+    def load(self, path):
+        coo = scipy.io.mmread(path)
+        if self.format == "csr":
+            return scipy.sparse.csr_matrix(coo)
+        elif self.format == "csc":
+            return scipy.sparse.csc_matrix(coo)
+        else:
+            assert(False)
+
+class PydataMatrixMarketTensorLoader:
+    def __init__(self):
+        pass
+
+    def load(self, path):
+        coo = scipy.io.mmread(path)
+        return sparse.COO.from_scipy_sparse(coo)
+
+class SuiteSparseTensor:
+    def __init__(self, path):
+        self.path = path
+
+    def __str__(self):
+        f = os.path.split(self.path)[1]
+        return f.replace(".mtx", "")
+
+    def load(self, loader):
+        return loader.load(self.path)
+
+class TensorCollectionSuiteSparse:
+    def __init__(self):
+        data = os.path.join(TENSOR_PATH, "suitesparse")
+        sstensors= glob.glob(os.path.join(data, "*.mtx"))
+        self.tensors = [SuiteSparseTensor(t) for t in sstensors]
+
+    def getTensors(self):
+        return self.tensors
+    def getTensorNames(self):
+        return [str(tensor) for tensor in self.getTensors()]
diff --git a/taco/bench.h b/taco/bench.h
@@ -41,17 +41,33 @@
 std::string getTacoTensorPath();
 taco::TensorBase loadRandomTensor(std::string name, std::vector<int> dims, float sparsity, taco::Format format);
 
-template<typename T>
-taco::Tensor<T> shiftLastMode(std::string name, taco::Tensor<T> original) {
+template<typename T, typename T2>
+taco::Tensor<T> shiftLastMode(std::string name, taco::Tensor<T2> original) {
   taco::Tensor<T> result(name, original.getDimensions(), original.getFormat());
   std::vector<int> coords(original.getOrder());
-  for (auto& value : taco::iterate<T>(original)) {
+  for (auto& value : taco::iterate<T2>(original)) {
     for (int i = 0; i < original.getOrder(); i++) {
       coords[i] = value.first[i];
     }
     int lastMode = original.getOrder() - 1;
     coords[lastMode] = (coords[lastMode] + 1) % original.getDimension(lastMode);
-    result.insert(coords, value.second);
+    // TODO (rohany): Temporarily use a constant value here.
+    result.insert(coords, T(2));
+  }
+  result.pack();
+  return result;
+}
+
+template<typename T>
+taco::Tensor<T> readIntoType(std::string name, std::string path, taco::ModeFormat format) {
+  auto tensor = taco::read(path, format);
+  taco::Tensor<T> result(name, tensor.getDimensions(), tensor.getFormat());
+  std::vector<int> coords(tensor.getOrder());
+  for (auto& value : taco::iterate<double>(tensor)) {
+    for (int i = 0; i < tensor.getOrder(); i++) {
+      coords[i] = value.first[i];
+    }
+    result.insert(coords, T(value.second));
   }
   result.pack();
   return result;
diff --git a/taco/taco b/taco/taco
@@ -1 +1 @@
-Subproject commit befdc4613b876b142c366709d8408cd0414c30de
+Subproject commit 36bbe9934edb9fd4ed773f144ff98ecbaec25921
diff --git a/taco/ufuncs.cpp b/taco/ufuncs.cpp
@@ -151,7 +151,7 @@ static void applyBenchSizes(benchmark::internal::Benchmark* b) {
 TACO_BENCH_ARGS(bench_ufunc_sparse, xor_0.01, 0.01, "xor")->Apply(applyBenchSizes);
 TACO_BENCH_ARGS(bench_ufunc_sparse, rightShift_0.01, 0.01, ">>")->Apply(applyBenchSizes);
 
-static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath) {
+static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath, Func op) {
   auto path = getTacoTensorPath();
   auto frosttTensorPath = path;
   if (frosttTensorPath[frosttTensorPath.size() - 1] != '/') {
@@ -161,50 +161,48 @@ static void bench_frostt_ufunc(benchmark::State& state, std::string tnsPath) {
   frosttTensorPath += tnsPath;
 
   // TODO (rohany): What format do we want to do here?
-  auto frosttTensor = read(frosttTensorPath, Sparse);
-  Tensor<double> other = shiftLastMode<double>("other", frosttTensor);
-
-  // TODO (rohany): Parametrize by the ufunc as well.
-  // TODO (rohany): Certain ufuncs need for operands to be of a certain type.
-  //  ldexp for example requires the right hand side to be an integer (for python
-  //  at least). Not sure how we'll handle that for this.
-  Func ldExp("2^", Ldexp(), leftIncAlgebra());
+  auto frosttTensor = readIntoType<int64_t>("frostt", frosttTensorPath, Sparse);
+  Tensor<int64_t> other = shiftLastMode<int64_t, int64_t>("other", frosttTensor);
 
   for (auto _ : state) {
     state.PauseTiming();
-    Tensor<double> result("result", frosttTensor.getDimensions(), frosttTensor.getFormat());
+    Tensor<int64_t> result("result", frosttTensor.getDimensions(), frosttTensor.getFormat());
+    result.setAssembleWhileCompute(true);
     switch (frosttTensor.getOrder()) {
       case 4: {
         IndexVar i, j, k, l;
-        result(i, j, k, l) = ldExp(frosttTensor(i, j, k, l), other(i, j, k, l));
+        result(i, j, k, l) = op(frosttTensor(i, j, k, l), other(i, j, k, l));
         break;
       }
       case 5: {
         IndexVar i, j, k, l, m;
-        result(i, j, k, l, m) = ldExp(frosttTensor(i, j, k, l, m), other(i, j, k, l, m));
+        result(i, j, k, l, m) = op(frosttTensor(i, j, k, l, m), other(i, j, k, l, m));
         break;
       }
       default:
         state.SkipWithError("invalid tensor dimension");
         return;
     }
     result.compile();
-    result.assemble();
     state.ResumeTiming();
 
     result.compute();
   }
 }
 
-// TODO (rohany): We can define another macro to "nest" defining benchmarks
-//  for each of the ufuncs that we want to operate on.
+Func ldExp("ldexp", Ldexp(), leftIncAlgebra());
+Func rightShift("rightShift", RightShift(), leftIncAlgebra());
+Func xorOp("xor", GeneralAdd(), xorAlgebra());
+
 #define FOREACH_FROSTT_TENSOR(__func__) \
   __func__(nips, "nips.tns") \
   __func__(uber_pickups, "uber-pickups.tns") \
-  __func__(chicaco_crime, "chicago-crime.tns") \
+  __func__(chicago_crime, "chicago-crime.tns") \
   __func__(lbnl_network, "lbnl-network.tns")
 
 #define DECLARE_FROSTT_UFUNC_BENCH(name, path) \
-  TACO_BENCH_ARGS(bench_frostt_ufunc, name, path);
+   TACO_BENCH_ARGS(bench_frostt_ufunc, name/xor, path, xorOp); \
+   TACO_BENCH_ARGS(bench_frostt_ufunc, name/ldExp, path, ldExp); \
+   TACO_BENCH_ARGS(bench_frostt_ufunc, name/rightShift, path, rightShift); \
 
-FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)
+FOREACH_FROSTT_TENSOR(DECLARE_FROSTT_UFUNC_BENCH)
diff --git a/unpack_suitesparse.sh b/unpack_suitesparse.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+cd data/suitesparse/
+
+for f in *.tar.gz; do
+    tar -xvf "$f" --strip=1
+done