aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-07-20 13:31:05 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-07-21 13:12:22 +0000
commitc6eaec3610fa27651582f6c1acad35afffe360f6 (patch)
tree8664d8e54be6ad1c14cbde61f6b5ac3478d266ab
parenta449a3638aaaa32414384219a0041b86591a7f41 (diff)
downloadComputeLibrary-c6eaec3610fa27651582f6c1acad35afffe360f6.tar.gz
COMPMID-3326; Update heuristic for GEMMReshaped and GEMMReshapedOnlyRHS
- Update the heuristic for Arm Mali-G77 (F32) in order to use the OpenCL image2d object on GEMM Change-Id: Ife6736a22ec2a114368bb338908f0c5f239dfad6 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3593 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp59
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp10
2 files changed, 58 insertions, 11 deletions
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp
index 11cb90ba19..9f3461e912 100644
--- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp
@@ -27,6 +27,9 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include <map>
#include <utility>
@@ -35,6 +38,8 @@ namespace arm_compute
{
namespace cl_gemm
{
+using namespace arm_compute::misc::shape_calculator;
+
CLGEMMReshapedOnlyRHSKernelConfigurationValhall::CLGEMMReshapedOnlyRHSKernelConfigurationValhall(GPUTarget gpu)
: ICLGEMMKernelConfiguration(gpu)
{
@@ -74,32 +79,66 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
- ARM_COMPUTE_UNUSED(b);
+ GEMMLHSMatrixInfo lhs_info_buf;
+ GEMMRHSMatrixInfo rhs_info_buf;
+ GEMMLHSMatrixInfo lhs_info_img;
+ GEMMRHSMatrixInfo rhs_info_img;
+
+ // Get lhs_info/rhs_info in case of OpenCL buffer
if(m == 1)
{
- if(n > 2048)
+ const unsigned int h0 = std::max(n / 4, 1U);
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
+ }
+ else
+ {
+ if(m > 256)
{
- return configure_lhs_rhs_info(m, n, 1, 8, 2, 1, 256, false, true, false, true);
+ const int v0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(8)), static_cast<int>(1));
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, v0, false, true, false, true);
}
else
{
- return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 256, false, true, false, true);
+ const int v0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(8)), static_cast<int>(1));
+ std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 4, 1, v0, false, true, false, true);
}
}
+
+ // Get lhs_info/rhs_info in case of OpenCL image
+ if(m == 1)
+ {
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 8, true, true, false, false, true);
+ }
else
{
- if(m > 300)
+ if((m / 4) * (n / 4) > 4096)
{
- const int v0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
- return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, v0, false, true, false, true);
+ const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(8)), static_cast<int>(1));
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
}
else
{
- const int v0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
- return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, v0, false, true, false, true);
+ const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(8)), static_cast<int>(1));
+ std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 4, 1, h0, false, true, false, false, true);
}
}
+
+ const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
+ const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
+ const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32);
+
+ // In case of small workloads, we use the OpenCL buffer rather than the OpenCL image2d
+ const bool use_cl_image2d = ((m / lhs_info_img.m0) * (n / rhs_info_img.n0)) * b < 1024 ? false : true;
+
+ if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
+ {
+ return std::make_pair(lhs_info_img, rhs_info_img);
+ }
+ else
+ {
+ return std::make_pair(lhs_info_buf, rhs_info_buf);
+ }
}
std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfigurationValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
@@ -120,7 +159,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> CLGEMMReshapedOnlyRHSKernelConfi
return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
}
}
- else if (m < 128)
+ else if(m < 128)
{
const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
if(k >= 512)
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index 43eb736d40..4a74630036 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -291,8 +291,16 @@ void CLGEMM::configure_reshaped_only_rhs(const CLCompileContext &compile_context
std::unique_ptr<ICLGEMMKernelConfiguration> gemm_config = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target);
ARM_COMPUTE_ERROR_ON_NULLPTR(gemm_config.get());
+ unsigned int m_internal = m;
+ unsigned int b_internal = batch_size;
+ if(reinterpret_input_as_3d)
+ {
+ m_internal = a->info()->dimension(1);
+ b_internal = a->info()->dimension(2);
+ }
+
// Configure lhs_info and rhs_info
- std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);
+ std::tie(lhs_info, rhs_info) = gemm_config->configure(m_internal, n, k, b_internal, data_type);
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))