Add in minmax.py input tensor generation

weiya711 · weiya711 · commit a60d2885c95b · 2021-04-08T20:18:09.000-07:00
diff --git a/numpy/minmax.py b/numpy/minmax.py
@@ -3,27 +3,23 @@
 import sparse
 import pytest
 import os
-from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader, VALIDATION_OUTPUT_PATH, PydataSparseTensorDumper, SuiteSparseTensor, safeCastPydataTensorToInts, RandomPydataSparseTensorLoader
+from util import MinMaxPydataSparseTensorLoader
 
-@pytest.mark.parametrize("dims", [3, 5, 7])
+@pytest.mark.parametrize("dims", [1, 3, 5, 7])
 def bench_pydata_minmax(tacoBench, dims):
-    loader = RandomPydataSparseTensorLoader()
+    loader = MinMaxPydataSparseTensorLoader()
     dims_list = [20] + [20] + [43 for ele in range(dims)]
     #FIXME: loader.random is always between 0 and 1, need to be larger. multiply by some value and then store to tns file
     #TODO: matrix shouldn't be completely random. it should have blocks of dense values (to simulate pruning) 
     #       and not just sparse uniform sampling
     
-    matrix = safeCastPydataTensorToInts(20*loader.random(dims_list, 0.10))
-    print(matrix)
+    matrix = loader.tensor(dims_list)
     def bench():
         reduced = matrix
         for m in range(len(dims_list)):   
             if m % 2 == 0: 
                 reduced = np.max(reduced, -1)
             else:
                 reduced = np.min(reduced, -1)     
-            print(reduced)
-            print(np.max(reduced))
-        print(reduced)
         return reduced
     tacoBench(bench)
diff --git a/numpy/util.py b/numpy/util.py
@@ -434,3 +434,79 @@ def dense_window(self, num):
 #     f.tight_layout()
 #     plt.show()
 
+# construct_minmax_tensor_key constructs a unique key that represents
+# an image tensor parameterized by the tensor order
+# The key itself is formatted by the string 'minmax', followed by the
+# tensor order. For example, a parameter of 3  
+# would have a key of minmax-3.tns.
+def construct_minmax_tensor_key(dims, variant=None):
+    path = TENSOR_PATH
+    name = "minmax"
+    if variant is None:
+        key = "{}-{}.tns".format(name, len(dims))
+    else:
+        key = "{}-{}-{}.tns".format(name,len(dims), variant)
+    return os.path.join(path, name, key)
+
+def generate_crds_helper(shape, level, crds):
+    sampling = 0.1
+    num = 3
+    std = 2
+    last_layer_sampling = 0.4
+
+    if level == len(shape) - 1:
+        return crds
+    else:
+        result = []
+        d = shape[level]
+        for c in crds:
+            # Get number of locations 
+            num_locs = int(sampling*d)
+            # Get location uniformly of where to sample around
+            locs = numpy.random.rand(num_locs)*d
+
+            # sample around each location using a normal distribution around that value with a std of 2
+            for loc in locs:
+                points = std * numpy.random.randn(num) + loc
+                points = points.astype('int')
+                points = numpy.clip(points, 0, d - 1)
+                for p in points:
+                    result.append(c+[p])
+
+        return generate_crds_helper(shape, level + 1, result)
+
+# RandomPydataSparseTensorLoader should be used to generate
+# random pydata.sparse tensors. It caches the loaded tensors
+# in the file system so that TACO benchmarks using tensors
+# with the same parameters can use the exact same tensors.
+class MinMaxPydataSparseTensorLoader:
+    def __init__(self):
+        self.loader = PydataSparseTensorLoader()
+
+    def tensor(self, shape, variant=None):
+        key = construct_minmax_tensor_key(shape)
+        # If a tensor with these properties exists already, then load it.
+        if os.path.exists(key):
+            return self.loader.load(key)
+        else:
+            # Otherwise, we must create a random tensor with the desired properties,
+            # dump it to the output file, then return it.
+            crds = self.generate_crds(shape)
+            values = dict()
+            for c in crds:
+                ind_list = numpy.random.rand(2)*shape[-1]
+                ind_list = ind_list.astype('int')
+                start = numpy.min(ind_list)
+                stop = numpy.max(ind_list)
+                for i in range(start, stop):
+                    temp = tuple(c[1:] + [i])
+                    values[temp] = int(20*numpy.random.rand())
+
+            dok = sparse.DOK(shape, values)
+            TnsFileDumper().dump_dict_to_file(shape, dok.data, key)
+            result = dok.asformat('coo')
+            return result
+
+                
+    def generate_crds(self, shape):
+        return generate_crds_helper(shape, 0, [[0]])