Skip to content

Commit 6e341fb

Browse files
committed
Add in changes for imaging benchmark to test fused itteration lattice construction
1 parent 499d347 commit 6e341fb

File tree

4 files changed

+101
-27
lines changed

4 files changed

+101
-27
lines changed

numpy/image.py

+46-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55
import matplotlib.pyplot as plt
66
import sparse
7-
from util import ImagePydataSparseTensorLoader, safeCastPydataTensorToInts
7+
from util import ImagePydataSparseTensorLoader, safeCastPydataTensorToInts, TnsFileDumper
88

99

1010
# plot_image plots the given original, binned, xor, and sparse xor images
@@ -215,6 +215,51 @@ def dense_bench():
215215
return xor_img
216216

217217
tacoBench(dense_bench)
218+
219+
# USED FOR TESTING ITTERATION LATTICE CONSTRUCTION TACO CODE ONLY
220+
def testOp(a, b, c):
221+
return np.logical_and(np.logical_not(np.logical_and(a, c).astype('int')).astype('int'), np.logical_not(np.logical_and(b, c).astype('int')).astype('int')).astype('int')
222+
@pytest.mark.skip(reason="Used for verification only")
223+
@pytest.mark.parametrize("num", list(range(1, 11)))
224+
@pytest.mark.parametrize("pt1", [0.5])
225+
def bench_test_fused_pydata(tacoBench, num, pt1):
226+
loader = ImagePydataSparseTensorLoader()
227+
sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1))
228+
sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2))
229+
sparse_bin_window = loader.sparse_window(num, 3)
230+
bin_img1 = loader.dense_image(num, pt1, 1)
231+
bin_img2 = loader.dense_image(num, pt1 + 0.05, 2)
232+
bin_window = loader.dense_window(num)
233+
234+
def sparse_bench():
235+
return testOp(sparse_bin_img1, sparse_bin_img2, sparse_bin_window).astype('int')
236+
237+
def dense_bench():
238+
return testOp(bin_img1, bin_img2, bin_window).astype('int')
239+
240+
ret = tacoBench(sparse_bench)
241+
sparse_xor_img = sparse_bench()
242+
xor_img = dense_bench()
243+
244+
# Write result to TNS file to see what's different
245+
shape = xor_img.shape
246+
result = sparse.COO.from_numpy(xor_img, fill_value=0)
247+
dok = sparse.DOK(result)
248+
TnsFileDumper().dump_dict_to_file(shape, dok.data, os.path.join("temp", "numpy-result-{}.tns".format(num)))
249+
250+
251+
num_elements = float(np.prod(bin_img1.shape))
252+
f = sparse_xor_img.fill_value
253+
print("shape1", sparse_bin_img1.shape)
254+
print("shape2", sparse_bin_img2.shape)
255+
print("sparse img1 nnz =", sparse_bin_img1.nnz, " ", np.sum(bin_img1 != 0))
256+
print("sparse img2 nnz =", sparse_bin_img2.nnz, " ", np.sum(bin_img2 != 0))
257+
print("sparse win nnz =", sparse_bin_window.nnz, " ", np.sum(bin_window != 0))
258+
print("Total num elements", num_elements)
259+
print("Fill value", f)
260+
print("Sparse xor NNF = ", sparse_xor_img.nnz, "\t", "Dense xor NNF = ", np.sum(xor_img != int(f)))
261+
print("Dense xor NNZ = ", np.sum(xor_img != 0))
262+
assert(sparse_xor_img.nnz == np.sum(xor_img != 1))
218263

219264
if __name__=="__main__":
220265
main()

numpy/util.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class TnsFileDumper:
4444
def __init__(self):
4545
pass
4646

47-
def dump_dict_to_file(self, shape, data, path):
47+
def dump_dict_to_file(self, shape, data, path, write_shape = False):
4848
# Sort the data so that the output is deterministic.
4949
sorted_data = sorted([list(coords) + [value] for coords, value in data.items()])
5050
with open(path, 'w+') as f:
@@ -53,9 +53,10 @@ def dump_dict_to_file(self, shape, data, path):
5353
strings = coords + [str(line[-1])]
5454
f.write(" ".join(strings))
5555
f.write("\n")
56-
shape_strings = [str(elem) for elem in shape] + ['0']
57-
f.write(" ".join(shape_strings))
58-
f.write("\n")
56+
if write_shape:
57+
shape_strings = [str(elem) for elem in shape] + ['0']
58+
f.write(" ".join(shape_strings))
59+
f.write("\n")
5960

6061
# ScipySparseTensorLoader loads a sparse tensor from a file into a
6162
# scipy.sparse CSR matrix.
@@ -372,7 +373,8 @@ def sparse_image(self, num, pt, variant=None, path='no'):
372373
bin_img = self.dense_image(num, pt, variant, path)
373374
result = sparse.COO.from_numpy(bin_img)
374375
dok = sparse.DOK(result)
375-
TnsFileDumper().dump_dict_to_file(self.shape[num], dok.data, key)
376+
write_shape = bin_img.flat[-1] == 0
377+
TnsFileDumper().dump_dict_to_file(self.shape[num], dok.data, key, write_shape)
376378
return result
377379

378380
# sparse_window and dense_window must be called after the image calls
@@ -389,7 +391,8 @@ def sparse_window(self, num, variant=3):
389391
result_np = self.dense_window(num)
390392
result = sparse.COO.from_numpy(result_np)
391393
dok = sparse.DOK(result)
392-
TnsFileDumper().dump_dict_to_file(shape, dok.data, key)
394+
write_shape = result_np.flat[-1] == 0
395+
TnsFileDumper().dump_dict_to_file(shape, dok.data, key, write_shape)
393396
return result
394397

395398
def dense_window(self, num):

taco/bench.h

+18
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,24 @@ taco::Tensor<T> castToType(std::string name, taco::Tensor<double> tensor) {
7474
return result;
7575
}
7676

77+
template<typename T>
78+
taco::Tensor<T> castToTypeZero(std::string name, taco::Tensor<double> tensor) {
79+
taco::Tensor<T> result(name, tensor.getDimensions(), tensor.getFormat());
80+
std::vector<int> coords(tensor.getOrder());
81+
for (auto& value : taco::iterate<double>(tensor)) {
82+
for (int i = 0; i < tensor.getOrder(); i++) {
83+
coords[i] = value.first[i];
84+
}
85+
// Attempt to cast the value to an integer. However, if the cast causes
86+
// the value to equal 0, then this will ruin the sparsity pattern of the
87+
// tensor, as the 0 values will get compressed out. So, if a cast would
88+
// equal 0, insert 1 instead to preserve the sparsity pattern of the tensor.
89+
result.insert(coords, static_cast<T>(value.second));
90+
}
91+
result.pack();
92+
return result;
93+
}
94+
7795
template<typename T, typename T2>
7896
taco::Tensor<T> shiftLastMode(std::string name, taco::Tensor<T2> original) {
7997
taco::Tensor<T> result(name, original.getDimensions(), original.getFormat());

taco/image.cpp

+28-20
Original file line numberDiff line numberDiff line change
@@ -55,22 +55,31 @@ struct xorAndAlgebra {
5555
}
5656
};
5757

58+
struct testConstructionAlgebra {
59+
IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
60+
auto m1 = Union(Complement(regions[0]), Complement(regions[2]));
61+
auto m2 = Union(Complement(regions[1]), Complement(regions[2]));
62+
return Intersect(m1, m2);
63+
}
64+
};
65+
5866
Func xorOp1("logical_xor", Boolean(), xorAlgebra());
5967
Func andOp1("logical_and", Boolean(), andAlgebra());
6068
Func xorAndOp("fused_xor_and", Boolean(), xorAndAlgebra());
69+
Func testOp("test", Boolean(), testConstructionAlgebra());
6170
static void bench_image_xor(benchmark::State& state, const Format& f) {
6271
int num = state.range(0);
6372
auto t1 = 0.5;
6473
auto t2 = 0.55;
65-
Tensor<int64_t> matrix1 = castToType<int64_t>("A", loadImageTensor("A", num, f, t1, 1 /* variant */));
66-
Tensor<int64_t> matrix2 = castToType<int64_t>("B", loadImageTensor("B", num, f, t2, 2 /* variant */));
74+
Tensor<int64_t> matrix1 = castToTypeZero<int64_t>("A", loadImageTensor("A", num, f, t1, 1 /* variant */));
75+
Tensor<int64_t> matrix2 = castToTypeZero<int64_t>("B", loadImageTensor("B", num, f, t2, 2 /* variant */));
6776
auto dims = matrix1.getDimensions();
6877

6978
for (auto _ : state) {
7079
state.PauseTiming();
7180
Tensor<int64_t> result("result", dims, f, 1);
7281
IndexVar i("i"), j("j");
73-
result(i, j) = xorOp1(matrix1(i, j), matrix2(i, j));
82+
result(i, j) = testOp(matrix1(i, j), matrix2(i, j));
7483
result.setAssembleWhileCompute(true);
7584
result.compile();
7685
state.ResumeTiming();
@@ -86,7 +95,7 @@ static void bench_image_xor(benchmark::State& state, const Format& f) {
8695
}
8796
}
8897
static void CustomArguments(benchmark::internal::Benchmark* b) {
89-
for (int i = 1; i <= 1; ++i)
98+
for (int i = 1; i <= 11; ++i)
9099
b->Args({i});
91100
}
92101
TACO_BENCH_ARGS(bench_image_xor, csr, CSR)->Apply(CustomArguments);
@@ -95,11 +104,14 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
95104
int num = state.range(0);
96105
auto t1 = 0.5;
97106
auto t2 = 0.55;
98-
Tensor<int64_t> matrix1 = castToType<int64_t>("A", loadImageTensor("A", num, f, t1, 1 /* variant */));
99-
Tensor<int64_t> matrix2 = castToType<int64_t>("B", loadImageTensor("B", num, f, t2, 2 /* variant */));
100-
Tensor<int64_t> matrix3 = castToType<int64_t>("C", loadImageTensor("C", num, f, 3 /* variant */));
107+
Tensor<int64_t> matrix1 = castToTypeZero<int64_t>("A", loadImageTensor("A", num, f, t1, 1 /* variant */));
108+
Tensor<int64_t> matrix2 = castToTypeZero<int64_t>("B", loadImageTensor("B", num, f, t2, 2 /* variant */));
109+
Tensor<int64_t> matrix3 = castToTypeZero<int64_t>("C", loadImageTensor("C", num, f, 3 /* variant */));
101110
auto dims = matrix1.getDimensions();
102111

112+
// write("temp/taco-mat1-" + std::to_string(num) + ".tns", matrix1);
113+
// write("temp/taco-mat2-" + std::to_string(num) + ".tns", matrix2);
114+
// write("temp/taco-mat3-" + std::to_string(num) + ".tns", matrix3);
103115
int nnz = 0;
104116
for (auto& it : iterate<int64_t>(matrix1)) {
105117
nnz++;
@@ -115,35 +127,31 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
115127
nnz++;
116128
}
117129
std::cout << "Matrix3 NNZ = " << nnz << std::endl;
130+
118131
for (auto _ : state) {
119132
state.PauseTiming();
120133
Tensor<int64_t> result("result", dims, f, 0);
121-
Tensor<int64_t> temp1("t1", dims, f, 0);
122-
Tensor<int64_t> temp2("t2", dims, f, 0);
134+
123135
IndexVar i("i"), j("j");
124-
// temp1(i,j) = andOp1(matrix1(i, j), matrix3(i, j));
125-
// temp2(i,j) = andOp1(matrix2(i, j), matrix3(i, j));
126-
// result(i, j) = xorOp1(temp1(i,j), temp2(i,j));
127-
// result(i, j) = xorOp1(andOp1(matrix1(i, j), matrix3(i, j)), andOp1(matrix2(i, j), matrix3(i, j)));
128-
result(i, j) = xorAndOp(matrix1(i, j), matrix2(i, j), matrix3(i, j));
136+
result(i, j) = testOp(matrix1(i, j), matrix2(i, j), matrix3(i, j));
129137
IndexStmt stmt = result.getAssignment().concretize();
130138
result.setAssembleWhileCompute(true);
131139
result.compile();
132140
state.ResumeTiming();
133141
result.compute();
134-
temp1 = temp1.removeExplicitZeros(temp1.getFormat());
135-
temp2 = temp2.removeExplicitZeros(temp2.getFormat());
136142
result = result.removeExplicitZeros(result.getFormat());
137143
int nnz = 0;
138144
for (auto& it : iterate<int64_t>(result)) {
139145
nnz++;
140146
}
141147

142148
std::cout << "Result NNZ = " << nnz << std::endl;
143-
std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen);
144-
ir::Stmt compute = lower(stmt, "compute", false, true);
145-
codegen->compile(compute, true);
146-
// std::cout << result << std::endl;
149+
// write("temp/taco-result" + std::to_string(num) + ".tns", result);
150+
// Used to print out generated TACO code
151+
// std::shared_ptr<ir::CodeGen> codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen);
152+
// ir::Stmt compute = lower(stmt, "compute", false, true);
153+
// codegen->compile(compute, true);
147154
}
148155
}
156+
TACO_BENCH_ARGS(bench_image_fused, csr, CSR)->Apply(CustomArguments);
149157
TACO_BENCH_ARGS(bench_image_fused, csr, CSR)->Apply(CustomArguments);

0 commit comments

Comments
 (0)