From ed5fe69b6612a5cf0dd52340f6781885d77afbc9 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 9 Jul 2020 08:41:10 +0100 Subject: COMPMID-3326: Update heuristic for GEMMReshaped and GEMMReshapedOnlyRHS - Update the heuristic for Arm Mali-G76 (F32) in order to use the OpenCL image2d object on GEMM - Create utility function to validate the support for image2d Change-Id: I0913ac5f27fd07992b0ac188af753a2abeb034ca Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3559 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- ...MMReshapedOnlyRHSKernelConfigurationBifrost.cpp | 42 ++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp') diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp index f662089c77..581c2d2199 100644 --- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp +++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp @@ -27,6 +27,9 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include #include @@ -35,6 +38,8 @@ namespace arm_compute { namespace cl_gemm { +using namespace arm_compute::misc::shape_calculator; + CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu) : ICLGEMMKernelConfiguration(gpu) { @@ -139,14 +144,47 @@ std::pair CLGEMMReshapedOnlyRHSKernelConfi ARM_COMPUTE_UNUSED(k); ARM_COMPUTE_UNUSED(b); + GEMMLHSMatrixInfo lhs_info_buf; + GEMMRHSMatrixInfo rhs_info_buf; + GEMMLHSMatrixInfo lhs_info_img; + GEMMRHSMatrixInfo rhs_info_img; + + // Get lhs_info/rhs_info in case of OpenCL buffer if(m == 1) { const unsigned int h0 = std::max(n / 2, 1U); - return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true); + std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true); } else { - return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true); + std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true); + } + + // Get lhs_info/rhs_info in case of OpenCL image + if(m == 1) + { + std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 4, false, true, false, false, true); + } + else + { + const int h0 = std::max(std::min(static_cast(n / 4), static_cast(16)), static_cast(1)); + std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true); + } + + const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32); + const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img); + const TensorInfo tensor_reshaped_info(shape, 1, DataType::F32); + + // In case of vector by matrix with few work-items, we use the OpenCL buffer rather than the OpenCL image2d + const bool use_cl_image2d = (m == 1 && n <= 4096) ? false : true; + + if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d) + { + return std::make_pair(lhs_info_img, rhs_info_img); + } + else + { + return std::make_pair(lhs_info_buf, rhs_info_buf); } } -- cgit v1.2.1