aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2019-10-03 15:12:09 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-10-04 14:03:07 +0000
commit0d548048a60035349b90b903924fbf38c74796b9 (patch)
tree7fac74cce8a88f1c5e444744bcdb3415b04b4780
parentbd18811ab83bf8d1542ae0238bf9fd4c05e255df (diff)
downloadComputeLibrary-0d548048a60035349b90b903924fbf38c74796b9.tar.gz
COMPMID-2572: Update the heuristic in CLGEMM for FP16
Change-Id: Ia7f248d72bf2690a8a4dae3f2e2afc983109bd6e Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/2035 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
-rw-r--r--arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h3
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h4
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp112
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp2
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp2
5 files changed, 102 insertions, 21 deletions
diff --git a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
index 4e6112e26..5eb4aadb0 100644
--- a/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
+++ b/arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.h
@@ -55,6 +55,9 @@ private:
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
index e52d3ca09..b9a1ba0bf 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -51,7 +51,7 @@ public:
CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
/** Initialise the kernel's input and output.
*
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
* @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
* @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
* @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
@@ -70,7 +70,7 @@ public:
const GEMMKernelInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
*
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
* @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
* @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
* @param[in] output Output tensor info. Data type supported: same as @p input0
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
index 9f3fc3aae..5955bac38 100644
--- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
@@ -42,9 +42,6 @@ CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::CLGEMMReshapedOnlyRHSKernelConf
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- ARM_COMPUTE_ERROR_ON(data_type != DataType::F32 && data_type != DataType::QASYMM8);
- ARM_COMPUTE_UNUSED(data_type);
-
using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
unsigned int b);
@@ -52,6 +49,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G51 =
{
{ DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f32 },
+ { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f16 },
{ DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_u8 }
};
@@ -59,6 +57,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G76 =
{
{ DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f32 },
+ { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f16 },
{ DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_u8 }
};
@@ -66,17 +65,39 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
static std::map<DataType, ConfigurationFunctionExecutorPtr> gemm_configs_G7x =
{
{ DataType::F32, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f32 },
+ { DataType::F16, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f16 },
{ DataType::QASYMM8, &CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8 }
};
switch(_target)
{
case GPUTarget::G76:
- return (this->*gemm_configs_G76[data_type])(m, n, k, b);
+ if (gemm_configs_G76.find(data_type) != gemm_configs_G76.end())
+ {
+ return (this->*gemm_configs_G76[data_type])(m, n, k, b);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not supported data type");
+ }
case GPUTarget::G51:
- return (this->*gemm_configs_G51[data_type])(m, n, k, b);
+ if (gemm_configs_G51.find(data_type) != gemm_configs_G51.end())
+ {
+ return (this->*gemm_configs_G51[data_type])(m, n, k, b);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not supported data type");
+ }
default:
- return (this->*gemm_configs_G7x[data_type])(m, n, k, b);
+ if (gemm_configs_G7x.find(data_type) != gemm_configs_G7x.end())
+ {
+ return (this->*gemm_configs_G7x[data_type])(m, n, k, b);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not supported data type");
+ }
}
}
@@ -89,12 +110,12 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
{
if(n > 2048)
{
- const unsigned int h0 = std::max(n / 4, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 4, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
}
else
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
}
}
@@ -111,7 +132,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
if(m == 1)
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
}
else
@@ -128,7 +149,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
if(m == 1)
{
const unsigned int n0 = n < 1280? 2 : 4;
- const unsigned int h0 = std::max(n / n0, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / n0, 1U);
return configure_lhs_rhs_info(m, n, 1, n0, 4, 1, h0, false, true, false, true);
}
else
@@ -137,6 +158,63 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
}
}
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+ ARM_COMPUTE_UNUSED(k);
+ ARM_COMPUTE_UNUSED(b);
+
+ if(m == 1)
+ {
+ if(n > 2048)
+ {
+ const unsigned int h0 = std::max(n / 4, 1U);
+ return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
+ }
+ else
+ {
+ const unsigned int h0 = std::max(n / 2, 1U);
+ return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
+ }
+ }
+ else
+ {
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 4, false, true, false, true);
+ }
+}
+
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+ ARM_COMPUTE_UNUSED(k);
+ ARM_COMPUTE_UNUSED(b);
+
+ if(m == 1)
+ {
+ const unsigned int h0 = std::max(n / 2, 1U);
+ return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
+ }
+ else
+ {
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
+ }
+}
+
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+{
+ ARM_COMPUTE_UNUSED(k);
+ ARM_COMPUTE_UNUSED(b);
+
+ if(m == 1)
+ {
+ const unsigned int n0 = n < 1280? 2 : 4;
+ const unsigned int h0 = std::max(n / n0, 1U);
+ return configure_lhs_rhs_info(m, n, 1, n0, 8, 1, h0, false, true, false, true);
+ }
+ else
+ {
+ return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
+ }
+}
+
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
@@ -146,12 +224,12 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
{
if(m == 1)
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
}
else
{
- const unsigned int h0 = std::max(n / 4, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 4, 1U);
return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, false, true, false, true);
}
}
@@ -159,12 +237,12 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
{
if(m == 1)
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
}
else
{
- const unsigned int h0 = std::max(n / 4, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 4, 1U);
return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, h0, false, true, false, true);
}
}
@@ -177,7 +255,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
if(m == 1)
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
}
else
@@ -193,12 +271,12 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
if(m == 1)
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
}
else
{
- const unsigned int h0 = std::max(n / 2, static_cast<unsigned int>(1));
+ const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 4, 2, 16, 1, h0, false, true, false, true);
}
}
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
index 3d5e1486a..24b230128 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -57,7 +57,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
{
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4, "The number of dimensions for the LHS matrix must be <= 4");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the RHS matrix must be <= 3");
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index 2a027d872..8d460142e 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -109,7 +109,7 @@ CLGEMM::GEMMType CLGEMM::select_gemm_type(unsigned int m, unsigned int n, unsign
{
if((m == 1) || (!reshape_b_only_on_first_run))
{
- gemm_type = GEMMType::NATIVE;
+ gemm_type = GEMMType::RESHAPED_ONLY_RHS;
}
else
{