add GPTJ linear shapes to the list to fallback TPP to oneDNN (#3368)

blzheng · web-flow · commit e6bab05e7eb7 · 2024-11-13T22:25:29.000+08:00
diff --git a/intel_extension_for_pytorch/nn/utils/_weight_prepack.py b/intel_extension_for_pytorch/nn/utils/_weight_prepack.py
@@ -59,8 +59,8 @@ def TPPLinear_weight_prepack(m, bk=None, bc=None, layer_dtype=torch.float32):
 #
 # For long term, mark as TODO, we will tune TPP block layout/loop order to make it on par with oneDNN.
 
-fallback_ic_shape_list = [13824, 11008]
-fallback_oc_shape_list = [4096, 5120]
+fallback_ic_shape_list = [13824, 11008, 16384, 4096]
+fallback_oc_shape_list = [4096, 5120, 16384, 12288]
 
 
 def Apply_TPPLinear_weight_prepack(m, dtype, device="cpu"):
diff --git a/tests/cpu/test_tpp_linear.py b/tests/cpu/test_tpp_linear.py
@@ -146,6 +146,7 @@ def test_tpp_linear_fallback_env_set(self):
                             assert model.mlp.use_tpp is True
                         self.assertEqual(out, ref_out)
                         _disable_tpp()
+            os.environ["BF16_OPTIMIZED_THROUGHPUT"] = "0"
 
     def test_tpp_linear_fallback_flag(self):
         x1 = torch.rand(1, 1, 4097)

Original file line number	Diff line number	Diff line change
`@@ -59,8 +59,8 @@ def TPPLinear_weight_prepack(m, bk=None, bc=None, layer_dtype=torch.float32):`
`59`	`59`	`#`
`60`	`60`	`# For long term, mark as TODO, we will tune TPP block layout/loop order to make it on par with oneDNN.`
`61`	`61`
`62`		`-fallback_ic_shape_list = [13824, 11008]`
`63`		`-fallback_oc_shape_list = [4096, 5120]`
	`62`	`+fallback_ic_shape_list = [13824, 11008, 16384, 4096]`
	`63`	`+fallback_oc_shape_list = [4096, 5120, 16384, 12288]`
`64`	`64`
`65`	`65`
`66`	`66`	`def Apply_TPPLinear_weight_prepack(m, dtype, device="cpu"):`