aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/gemm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/kernels/gemm')
-rw-r--r--src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp19
-rw-r--r--src/gpu/cl/kernels/gemm/ClGemmHelpers.h17
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp42
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h4
4 files changed, 78 insertions, 4 deletions
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
index 1bf27ba277..67da06102d 100644
--- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
+++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -110,6 +110,23 @@ Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info,
return Status{};
}
+
+bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
+ const DataType data_type, unsigned int &best_m0, unsigned int &best_n0)
+{
+ ARM_COMPUTE_UNUSED(n, k, b, data_type);
+
+ const unsigned int mmul_k0 = 4;
+ best_m0 = 4;
+ best_n0 = 4;
+
+ const unsigned int ceil_to_multiple_m_m0 = ceil_to_multiple(m, best_m0);
+ const unsigned int m_div_m0 = ceil_to_multiple_m_m0 / best_m0;
+ const unsigned int ceil_to_multiple_m_div_m0_mmul_k0 = ceil_to_multiple(m_div_m0, mmul_k0);
+ const unsigned int gws_y = ceil_to_multiple_m_div_m0_mmul_k0 / mmul_k0;
+
+ return ((k % mmul_k0) == 0) && (gws_y > 4);
+}
} // namespace gemm
} // namespace kernels
} // namespace opencl
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.h b/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
index 3fce8c9173..bf1e8fce82 100644
--- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
+++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -88,6 +88,21 @@ void update_padding_for_cl_image(ITensorInfo *tensor);
* @return Status reporting if we can use the image2d OpenCL object on the RHS reshaped matrix
*/
Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info);
+
+/** Determine if the MMUL kernels should be preferred
+ *
+ * @param[in] m Number of rows of the LHS matrix
+ * @param[in] n Number of columns of the RHS matrix
+ * @param[in] k Number of columns of the LHS matrix, rows of the RHS matrix
+ * @param[in] b Batch size
+ * @param[in] data_type Data type FP32/FP16
+ * @param[in, out] best_m0 Suggested M0 (number of rows of the output block) for the kernel
+ * @param[in, out] best_n0 Suggested N0 (number of columns of the output block) for the kernel
+ *
+ * @return true if MMUL kernel is preferred over kernels w/o MMUL, false otherwise
+ */
+bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
+ const DataType data_type, unsigned int &best_m0, unsigned int &best_n0);
} // namespace gemm
} // namespace kernels
} // namespace opencl
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
index a82084a8df..97762980be 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,7 +29,9 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
+#include "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h"
#include <utility>
@@ -61,6 +63,10 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+
ConfigurationFunctionExecutorPtr func = nullptr;
switch(_target)
@@ -68,6 +74,10 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
case GPUTarget::G78:
func = configs_G78.get_function(data_type);
break;
+ case GPUTarget::G715:
+ case GPUTarget::G615:
+ func = configs_G715.get_function(data_type);
+ break;
case GPUTarget::G77:
default:
func = configs_G77.get_function(data_type);
@@ -564,6 +574,36 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
}
+
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+ unsigned int best_m0;
+ unsigned int best_n0;
+
+ if(is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
+ {
+ return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
+ }
+ else
+ {
+ return configure_G77_f32(m, n, k, b);
+ }
+}
+
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+ unsigned int best_m0;
+ unsigned int best_n0;
+
+ if(is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
+ {
+ return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
+ }
+ else
+ {
+ return configure_G78_f16(m, n, k, b);
+ }
+}
} // namespace gemm
} // namespace kernels
} // namespace opencl
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
index c5e80a7ddc..0ec068fffd 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,6 +53,8 @@ private:
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels