16
16
17
17
// ================================================================================
18
18
// this file has been auto-generated, do not modify its contents!
19
- // date: 2024-11-26 13:52:06.286983
20
- // git hash: c4c6ac09808d14b5407afb06ecdecd235cd50ed3
19
+ // date: 2024-11-26 14:20:49.081641
20
+ // git hash: 76c695a4cc5b13b3d5841ac5085574a5b47a299c
21
21
// ================================================================================
22
22
23
23
#ifndef KERNEL_FLOAT_MACROS_H
@@ -824,6 +824,13 @@ using default_policy = KERNEL_FLOAT_POLICY;
824
824
825
825
namespace detail {
826
826
827
+ template <typename F, typename Output, typename ... Args>
828
+ struct invoke_impl {
829
+ KERNEL_FLOAT_INLINE static Output call (F fun, Args... args) {
830
+ return fun (args...);
831
+ }
832
+ };
833
+
827
834
//
828
835
template <typename Policy, typename F, size_t N, typename Output, typename ... Args>
829
836
struct apply_fallback_impl {
@@ -853,13 +860,6 @@ template<int Level, typename F, size_t N, typename Output, typename... Args>
853
860
struct apply_fallback_impl <approx_level_policy<Level>, F, N, Output, Args...>:
854
861
apply_impl<approx_policy, F, N, Output, Args...> {};
855
862
856
- template <typename F, typename Output, typename ... Args>
857
- struct invoke_impl {
858
- KERNEL_FLOAT_INLINE static Output call (F fun, Args... args) {
859
- return fun (args...);
860
- }
861
- };
862
-
863
863
// Only for `accurate_policy` do we implement `apply_impl`, the others will fall back to `apply_base_impl`.
864
864
template <typename F, size_t N, typename Output, typename ... Args>
865
865
struct apply_impl <accurate_policy, F, N, Output, Args...> {
@@ -1416,7 +1416,7 @@ KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(rsqrt)
1416
1416
}
1417
1417
1418
1418
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN (float , exp, __expf(input))
1419
- KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , exp2, __exp2f(input))
1419
+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, exp2, __exp2f(input)) // Seems to be missing?
1420
1420
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , exp10, __exp10f(input))
1421
1421
1422
1422
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , log, __logf(input))
@@ -1442,19 +1442,21 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, tan, __tanf(input))
1442
1442
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX (double , rcp, " rcp.approx.ftz.f64" , " d" )
1443
1443
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(double , rsqrt, " rsqrt.approx.f64" , " d" )
1444
1444
1445
+ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , exp2, " ex2.approx.f32" , " f" )
1445
1446
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , sqrt, " sqrt.approx.f32" , " f" )
1446
1447
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rcp, " rcp.approx.f32" , " f" )
1447
1448
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rsqrt, " rsqrt.approx.f32" , " f" )
1448
1449
KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , tanh, " tanh.approx.f32" , " f" )
1450
+
1451
+ // These are no longer necessary due to the KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN above
1452
+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, sin, "sin.approx.f32", "f")
1453
+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
1454
+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
1449
1455
#endif
1450
1456
1451
1457
#define KERNEL_FLOAT_FAST_F32_MAP (F ) \
1452
1458
F (exp) F(exp2) F(exp10) F(log) F(log2) F(log10) F(sin) F(cos) F(tan) F(rcp) F(rsqrt) F(sqrt)
1453
1459
1454
- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, sin, "sin.approx.f32", "f")
1455
- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
1456
- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, exp2, "ex2.approx.f32", "f")
1457
- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
1458
1460
#else
1459
1461
#define KERNEL_FLOAT_FAST_F32_MAP (F )
1460
1462
#endif
0 commit comments