Change some parameters to improve performance

weiya711 · weiya711 · commit 90ad76406dbc · 2021-04-08T16:53:02.000-07:00
diff --git a/numpy/image.py b/numpy/image.py
@@ -3,12 +3,12 @@
 import os
 import pytest
 import sparse
-from util import ImagePydataSparseTensorLoader, safeCastPydataTensorToInts, TnsFileDumper #plot_image 
+from util import ImagePydataSparseTensorLoader, safeCastPydataTensorToInts, TnsFileDumper#, plot_image 
 
 # import matplotlib.pyplot as plt 
 
 @pytest.mark.parametrize("num", list(range(1, 99))) 
-@pytest.mark.parametrize("pt1", [0.5])
+@pytest.mark.parametrize("pt1", [0.75])
 def bench_edge_detection_pydata(tacoBench, num, pt1, plot):
         loader = ImagePydataSparseTensorLoader()
         sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1))
@@ -44,7 +44,7 @@ def dense_bench():
             #plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2)
 
 @pytest.mark.parametrize("num", list(range(1, 99))) 
-@pytest.mark.parametrize("pt1", [0.5])
+@pytest.mark.parametrize("pt1", [0.75])
 def bench_edge_detection_dense(tacoBench, num, pt1):
         loader = ImagePydataSparseTensorLoader()
         bin_img1 = loader.dense_image(num, pt1, 1) 
@@ -56,7 +56,7 @@ def dense_bench():
         tacoBench(dense_bench)
 
 @pytest.mark.parametrize("num", list(range(1, 99))) 
-@pytest.mark.parametrize("pt1", [0.5])
+@pytest.mark.parametrize("pt1", [0.75])
 def bench_edge_detection_fused_pydata(tacoBench, num, pt1, plot):
         loader = ImagePydataSparseTensorLoader()
         sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1))
@@ -94,12 +94,12 @@ def dense_bench():
             sparse_xor_img = sparse_xor_img.todense()
             t1 = round(loader.max[num]*pt1, 2)
             t2 = round(loader.max[num]*(pt1 + 0.05), 2)
-            #plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2, bin_window)
+            plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2, bin_window)
 
         assert(sparse_xor_img.nnz == np.sum(xor_img != 0))
 
 @pytest.mark.parametrize("num", list(range(1, 99))) 
-@pytest.mark.parametrize("pt1", [0.5])
+@pytest.mark.parametrize("pt1", [0.75])
 def bench_edge_detection_fused_dense(tacoBench, num, pt1):
         loader = ImagePydataSparseTensorLoader()
         bin_img1 = loader.dense_image(num, pt1, 1) 
@@ -115,7 +115,7 @@ def dense_bench():
 
 #TODO: Add in a benchmark that uses windowing for medical imaging as well. 
 @pytest.mark.parametrize("num", list(range(1, 99))) 
-@pytest.mark.parametrize("pt1", [0.5])
+@pytest.mark.parametrize("pt1", [0.75])
 @pytest.mark.parametrize("window_size", [0.25, 0.2, 0.15, 0.1])
 def bench_edge_detection_window_pydata(tacoBench, num, pt1, window_size, plot):
         loader = ImagePydataSparseTensorLoader()
@@ -168,7 +168,7 @@ def dense_bench():
         assert(sparse_xor_img.nnz == np.sum(xor_img != 0))
 
 @pytest.mark.parametrize("num", list(range(1, 99))) 
-@pytest.mark.parametrize("pt1", [0.5])
+@pytest.mark.parametrize("pt1", [0.75])
 @pytest.mark.parametrize("window_size", [0.25, 0.2, 0.15, 0.1])
 def bench_edge_detection_window_dense(tacoBench, num, pt1, window_size):
         loader = ImagePydataSparseTensorLoader()
@@ -192,6 +192,7 @@ def dense_bench():
 # USED FOR TESTING ITTERATION LATTICE CONSTRUCTION TACO CODE ONLY
 def testOp(a, b, c):
     return np.logical_and(np.logical_not(np.logical_and(a, c).astype('int')).astype('int'), np.logical_not(np.logical_and(b, c).astype('int')).astype('int')).astype('int')
+
 @pytest.mark.skip(reason="Used for verification only")
 @pytest.mark.parametrize("num", list(range(1, 11))) 
 @pytest.mark.parametrize("pt1", [0.5])
@@ -233,3 +234,74 @@ def dense_bench():
         print("Sparse xor NNF = ", sparse_xor_img.nnz, "\t", "Dense xor NNF = ", np.sum(xor_img != int(f)))
         print("Dense xor NNZ = ", np.sum(xor_img != 0))
         assert(sparse_xor_img.nnz == np.sum(xor_img != 1))
+
+@pytest.mark.skip(reason="for getting the input matrices statistics only")
+@pytest.mark.parametrize("num", list(range(1, 99))) 
+@pytest.mark.parametrize("pt1", [0.75])
+def bench_edge_detection_statistics(tacoBench, num, pt1):
+        loader = ImagePydataSparseTensorLoader()
+        sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1))
+        sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2))
+        sparse_bin_window = loader.sparse_window(num, 3)
+
+        print(sparse_bin_img1.shape)
+        print(sparse_bin_img2.shape)
+        extra_info = dict()
+        extra_info['nnz1'] = sparse_bin_img1.nnz
+        extra_info['nnz2'] = sparse_bin_img2.nnz
+        extra_info['nnz3'] = sparse_bin_window.nnz
+        extra_info['dimx'] = sparse_bin_window.shape[0]
+        extra_info['dimy'] = sparse_bin_window.shape[1]
+
+        def sparse_bench():
+            sbi1 = np.logical_and(sparse_bin_img1, sparse_bin_window)
+            sbi2 = np.logical_and(sparse_bin_img2, sparse_bin_window)
+            sparse_xor_img = np.logical_xor(sbi1, sbi2).astype('int')
+            return sparse_xor_img
+
+        tacoBench(sparse_bench, extra_info)
+
+@pytest.mark.skip(reasoun="For image generation only")
+@pytest.mark.parametrize("num", [42, 44, 50, 63, 92]) 
+@pytest.mark.parametrize("pt1", [0.75])
+def bench_edge_detection_fused_pydata(tacoBench, num, pt1, plot):
+        loader = ImagePydataSparseTensorLoader()
+        sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1))
+        sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2))
+        sparse_bin_window = loader.sparse_window(num, 3)
+        bin_img1 = loader.dense_image(num, pt1, 1) 
+        bin_img2 = loader.dense_image(num, pt1 + 0.05, 2)
+        bin_window = loader.dense_window(num)
+
+        if plot:
+            print(sparse_bin_img1.shape)
+            print(sparse_bin_img2.shape)
+
+        def sparse_bench():
+            sbi1 = np.logical_and(sparse_bin_img1, sparse_bin_window)
+            sbi2 = np.logical_and(sparse_bin_img2, sparse_bin_window)
+            sparse_xor_img = np.logical_xor(sbi1, sbi2).astype('int')
+            return sparse_xor_img
+
+        def dense_bench():
+            bi1 = np.logical_and(bin_img1, bin_window).astype('int')
+            bi2 = np.logical_and(bin_img2, bin_window).astype('int')
+            xor_img = np.logical_xor(bi1, bi2).astype('int')
+            return xor_img
+        ret = tacoBench(sparse_bench)
+        sparse_xor_img = sparse_bench()
+        xor_img = dense_bench()
+        
+        if plot:
+            num_elements = float(np.prod(bin_img1.shape))
+            print("Sparse xor NNZ = ", sparse_xor_img.nnz, "\t", "Dense xor NNZ = ", np.sum(xor_img != 0))
+            print("Sparsity img 1 ", np.sum(bin_img1 != 0) / num_elements)
+            print("Sparsity img 2 ", np.sum(bin_img2 != 0) / num_elements)
+            print("Sparsity xor ", np.sum(xor_img != 0) / num_elements)
+            sparse_xor_img = sparse_xor_img.todense()
+            t1 = round(loader.max[num]*pt1, 2)
+            t2 = round(loader.max[num]*(pt1 + 0.05), 2)
+            #plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2, bin_window)
+
+        assert(sparse_xor_img.nnz == np.sum(xor_img != 0))
+
diff --git a/numpy/util.py b/numpy/util.py
@@ -400,8 +400,8 @@ def dense_window(self, num):
         result_np = numpy.zeros(shape)
         m0 = int(shape[0] / 2)
         m1 = int(shape[1] / 2)
-        dm0 = int(0.2*m0)
-        dm1 = int(0.2*m1)
+        dm0 = int(0.1*m0)
+        dm1 = int(0.1*m1)
         result_np[m0+dm0:m0+3*dm0, m1+dm1:m1+3*dm1] = 1
         result_np[m0-3*dm0:m0-dm0, m1+dm1:m1+3*dm1] = 1
         result_np[m0-3*dm0:m0-dm0, m1-3*dm1:m1-dm1] = 1
diff --git a/taco/image.cpp b/taco/image.cpp
@@ -68,8 +68,8 @@ Func andOp1("logical_and", Boolean(), andAlgebra());
 Func xorAndOp("fused_xor_and", Boolean(), xorAndAlgebra());
 Func testOp("test", Boolean(), testConstructionAlgebra());
 static void bench_image_xor(benchmark::State& state, const Format& f) {
-  auto t1 = 0.5;
-  auto t2 = 0.55;
+  auto t1 = 0.75;
+  auto t2 = 0.80;
 
   auto num_str = getEnvVar("IMAGE_NUM");
   if (num_str == "") {
@@ -78,7 +78,7 @@ static void bench_image_xor(benchmark::State& state, const Format& f) {
   }
 
   int num = std::stoi(num_str);
-
+//  int num = state.range(0);
   taco::Tensor<int64_t> matrix1, matrix2;
   try {
     matrix1 = castToTypeZero<int64_t>("A", loadImageTensor("A", num, f, t1, 1 /* variant */));
@@ -101,12 +101,6 @@ static void bench_image_xor(benchmark::State& state, const Format& f) {
     result.compile();
     state.ResumeTiming();
     result.compute();
-    result = result.removeExplicitZeros(result.getFormat());
-
-    int nnz = 0;
-    for (auto& it : iterate<int64_t>(result)) {
-      nnz++;
-    }
 //    std::cout << "Result NNZ = " << nnz << std::endl;
 //    std::cout << result << std::endl;
   }
@@ -115,11 +109,11 @@ static void CustomArguments(benchmark::internal::Benchmark* b) {
   for (int i = 1; i <= 98; ++i)
       b->Args({i});
 }
-TACO_BENCH_ARGS(bench_image_xor, csr, CSR);
+TACO_BENCH_ARGS(bench_image_xor, csr, CSR);//->Apply(CustomArguments);
 
 static void bench_image_fused(benchmark::State& state, const Format& f) {
 //  int num = state.range(0);
-  auto t1 = 0.5;
+  auto t1 = 0.75;
   auto t2 = 0.55;
 
   auto num_str = getEnvVar("IMAGE_NUM");
@@ -174,7 +168,7 @@ static void bench_image_fused(benchmark::State& state, const Format& f) {
     result.compile();
     state.ResumeTiming();
     result.compute();
-    result = result.removeExplicitZeros(result.getFormat());
+//    result = result.removeExplicitZeros(result.getFormat());
 
 //    int nnz = 0;
 //    for (auto& it : iterate<int64_t>(result)) {
@@ -192,8 +186,8 @@ TACO_BENCH_ARGS(bench_image_fused, csr, CSR);
 
 static void bench_image_window(benchmark::State& state, const Format& f, double window_size) {
 //  int num = state.range(0);
-  auto t1 = 0.5;
-  auto t2 = 0.55;
+  auto t1 = 0.75;
+  auto t2 = 0.80;
 
   auto num_str = getEnvVar("IMAGE_NUM");
   if (num_str == "") {
@@ -233,7 +227,7 @@ static void bench_image_window(benchmark::State& state, const Format& f, double
     result.compile();
     state.ResumeTiming();
     result.compute();
-    result = result.removeExplicitZeros(result.getFormat());
+//    result = result.removeExplicitZeros(result.getFormat());
 
 //        int nnz = 0;
 //    for (auto& it : iterate<int64_t>(result)) {
@@ -251,4 +245,4 @@ static void bench_image_window(benchmark::State& state, const Format& f, double
 TACO_BENCH_ARGS(bench_image_window, csr/0.25, CSR, 0.25);
 TACO_BENCH_ARGS(bench_image_window, csr/0.2, CSR, 0.2);
 TACO_BENCH_ARGS(bench_image_window, csr/0.15, CSR, 0.15);
-TACO_BENCH_ARGS(bench_image_window, csr/0.1, CSR, 0.1);
+TACO_BENCH_ARGS(bench_image_window, csr/0.1, CSR, 0.1);