Skip to content

Commit a60d288

Browse files
committed
Add in minmax.py input tensor generation
1 parent 90ad764 commit a60d288

File tree

2 files changed

+80
-8
lines changed

2 files changed

+80
-8
lines changed

numpy/minmax.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,23 @@
33
import sparse
44
import pytest
55
import os
6-
from util import TensorCollectionFROSTT, PydataTensorShifter, TensorCollectionSuiteSparse, ScipyTensorShifter, PydataMatrixMarketTensorLoader, ScipyMatrixMarketTensorLoader, VALIDATION_OUTPUT_PATH, PydataSparseTensorDumper, SuiteSparseTensor, safeCastPydataTensorToInts, RandomPydataSparseTensorLoader
6+
from util import MinMaxPydataSparseTensorLoader
77

8-
@pytest.mark.parametrize("dims", [3, 5, 7])
8+
@pytest.mark.parametrize("dims", [1, 3, 5, 7])
99
def bench_pydata_minmax(tacoBench, dims):
10-
loader = RandomPydataSparseTensorLoader()
10+
loader = MinMaxPydataSparseTensorLoader()
1111
dims_list = [20] + [20] + [43 for ele in range(dims)]
1212
#FIXME: loader.random is always between 0 and 1, need to be larger. multiply by some value and then store to tns file
1313
#TODO: matrix shouldn't be completely random. it should have blocks of dense values (to simulate pruning)
1414
# and not just sparse uniform sampling
1515

16-
matrix = safeCastPydataTensorToInts(20*loader.random(dims_list, 0.10))
17-
print(matrix)
16+
matrix = loader.tensor(dims_list)
1817
def bench():
1918
reduced = matrix
2019
for m in range(len(dims_list)):
2120
if m % 2 == 0:
2221
reduced = np.max(reduced, -1)
2322
else:
2423
reduced = np.min(reduced, -1)
25-
print(reduced)
26-
print(np.max(reduced))
27-
print(reduced)
2824
return reduced
2925
tacoBench(bench)

numpy/util.py

+76
Original file line numberDiff line numberDiff line change
@@ -434,3 +434,79 @@ def dense_window(self, num):
434434
# f.tight_layout()
435435
# plt.show()
436436

437+
# construct_minmax_tensor_key constructs a unique key that represents
438+
# an image tensor parameterized by the tensor order
439+
# The key itself is formatted by the string 'minmax', followed by the
440+
# tensor order. For example, a parameter of 3
441+
# would have a key of minmax-3.tns.
442+
def construct_minmax_tensor_key(dims, variant=None):
443+
path = TENSOR_PATH
444+
name = "minmax"
445+
if variant is None:
446+
key = "{}-{}.tns".format(name, len(dims))
447+
else:
448+
key = "{}-{}-{}.tns".format(name,len(dims), variant)
449+
return os.path.join(path, name, key)
450+
451+
def generate_crds_helper(shape, level, crds):
452+
sampling = 0.1
453+
num = 3
454+
std = 2
455+
last_layer_sampling = 0.4
456+
457+
if level == len(shape) - 1:
458+
return crds
459+
else:
460+
result = []
461+
d = shape[level]
462+
for c in crds:
463+
# Get number of locations
464+
num_locs = int(sampling*d)
465+
# Get location uniformly of where to sample around
466+
locs = numpy.random.rand(num_locs)*d
467+
468+
# sample around each location using a normal distribution around that value with a std of 2
469+
for loc in locs:
470+
points = std * numpy.random.randn(num) + loc
471+
points = points.astype('int')
472+
points = numpy.clip(points, 0, d - 1)
473+
for p in points:
474+
result.append(c+[p])
475+
476+
return generate_crds_helper(shape, level + 1, result)
477+
478+
# RandomPydataSparseTensorLoader should be used to generate
479+
# random pydata.sparse tensors. It caches the loaded tensors
480+
# in the file system so that TACO benchmarks using tensors
481+
# with the same parameters can use the exact same tensors.
482+
class MinMaxPydataSparseTensorLoader:
483+
def __init__(self):
484+
self.loader = PydataSparseTensorLoader()
485+
486+
def tensor(self, shape, variant=None):
487+
key = construct_minmax_tensor_key(shape)
488+
# If a tensor with these properties exists already, then load it.
489+
if os.path.exists(key):
490+
return self.loader.load(key)
491+
else:
492+
# Otherwise, we must create a random tensor with the desired properties,
493+
# dump it to the output file, then return it.
494+
crds = self.generate_crds(shape)
495+
values = dict()
496+
for c in crds:
497+
ind_list = numpy.random.rand(2)*shape[-1]
498+
ind_list = ind_list.astype('int')
499+
start = numpy.min(ind_list)
500+
stop = numpy.max(ind_list)
501+
for i in range(start, stop):
502+
temp = tuple(c[1:] + [i])
503+
values[temp] = int(20*numpy.random.rand())
504+
505+
dok = sparse.DOK(shape, values)
506+
TnsFileDumper().dump_dict_to_file(shape, dok.data, key)
507+
result = dok.asformat('coo')
508+
return result
509+
510+
511+
def generate_crds(self, shape):
512+
return generate_crds_helper(shape, 0, [[0]])

0 commit comments

Comments
 (0)