aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/gemm
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/kernels/gemm')
-rw-r--r--src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp79
-rw-r--r--src/gpu/cl/kernels/gemm/ClGemmHelpers.h36
-rw-r--r--src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h12
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp90
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h21
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp19
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h6
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp54
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h12
-rw-r--r--src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h2
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp163
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h27
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp168
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h18
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h2
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp242
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h39
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp550
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h27
-rw-r--r--src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h2
20 files changed, 813 insertions, 756 deletions
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
index 9350bf74bb..b5ebac3b49 100644
--- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
+++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp
@@ -39,14 +39,24 @@ namespace kernels
{
namespace gemm
{
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
- bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m,
+ unsigned int n,
+ unsigned int m0,
+ unsigned int n0,
+ unsigned int k0,
+ unsigned int v0,
+ unsigned int h0,
+ bool lhs_interleave,
+ bool rhs_interleave,
+ bool lhs_transpose,
+ bool rhs_transpose,
+ bool export_to_cl_image)
{
ARM_COMPUTE_ERROR_ON(m0 == 0 || n0 == 0);
ARM_COMPUTE_ERROR_ON(v0 == 0);
v0 = std::max(std::min(static_cast<int>(m / m0), static_cast<int>(v0)), static_cast<int>(1));
- if(h0 == 0)
+ if (h0 == 0)
{
// When h0 is 0, we should take the maximum H0 possible
h0 = std::max(n / n0, 1U);
@@ -62,17 +72,22 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned
return std::make_pair(lhs_info, rhs_info);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
- unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
+ unsigned int n,
+ unsigned int k,
+ unsigned int b,
+ DataType data_type)
{
- ARM_COMPUTE_ERROR_ON_MSG(info_buf.second.export_to_cl_image == true, "The fallback GeMM configuration cannot have export_to_cl_image = true");
+ ARM_COMPUTE_ERROR_ON_MSG(info_buf.second.export_to_cl_image == true,
+ "The fallback GeMM configuration cannot have export_to_cl_image = true");
const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, data_type);
const TensorShape shape = misc::shape_calculator::compute_rhs_reshaped_shape(tensor_rhs_info, info_img.second);
const TensorInfo tensor_reshaped_info(shape, 1, data_type);
- if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, info_img.second)))
+ if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, info_img.second)))
{
return info_img;
}
@@ -90,42 +105,56 @@ void update_padding_for_cl_image(ITensorInfo *tensor)
const unsigned int pixel_alignment = get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device());
ARM_COMPUTE_ERROR_ON_MSG(pixel_alignment == 0, "Cannot retrieve cl_image pitch alignment");
- if(pixel_alignment == 0)
+ if (pixel_alignment == 0)
{
return;
}
const unsigned int row_pitch_alignment = pixel_alignment * num_floats_per_pixel;
- const unsigned int round_up_width = ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
- const unsigned int padding = round_up_width - stride_y_in_elements;
+ const unsigned int round_up_width =
+ ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
+ const unsigned int padding = round_up_width - stride_y_in_elements;
tensor->extend_padding(PaddingSize(0, tensor->padding().right + padding, 0, 0));
}
Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info, const GEMMRHSMatrixInfo &rhs_info)
{
- if(rhs_info.export_to_cl_image)
+ if (rhs_info.export_to_cl_image)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 == 2) || (rhs_info.n0 == 3)) && rhs_info.transpose == false, "Export to cl_image only supported with n0 = 4, 8 or 16");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.k0 == 2) || (rhs_info.k0 == 3)) && rhs_info.transpose == true, "Export to cl_image only supported with k0 = 4, 8 or 16");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.n0 == 2) || (rhs_info.n0 == 3)) && rhs_info.transpose == false,
+ "Export to cl_image only supported with n0 = 4, 8 or 16");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(((rhs_info.k0 == 2) || (rhs_info.k0 == 3)) && rhs_info.transpose == true,
+ "Export to cl_image only supported with k0 = 4, 8 or 16");
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(&tensor_reshaped_info, DataType::F32, DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()), "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0, "Impossible to retrieve the cl_image pitch alignment");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ !image2d_from_buffer_supported(CLKernelLibrary::get().get_device()),
+ "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0,
+ "Impossible to retrieve the cl_image pitch alignment");
// Check the width and height of the output tensor.
// Since we cannot create a 3d image from a buffer, the third dimension is collapsed on the second dimension
const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[0] > max_image_w * 4, "Not supported width for cl_image");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[1] * tensor_reshaped_info.tensor_shape()[2] > max_image_h, "Not supported height for cl_image");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(tensor_reshaped_info.tensor_shape()[0] > max_image_w * 4,
+ "Not supported width for cl_image");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ tensor_reshaped_info.tensor_shape()[1] * tensor_reshaped_info.tensor_shape()[2] > max_image_h,
+ "Not supported height for cl_image");
}
return Status{};
}
-bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
- const DataType data_type, unsigned int &best_m0, unsigned int &best_n0)
+bool is_mmul_kernel_preferred(const unsigned int m,
+ const unsigned int n,
+ const unsigned int k,
+ const unsigned int b,
+ const DataType data_type,
+ unsigned int &best_m0,
+ unsigned int &best_n0)
{
ARM_COMPUTE_UNUSED(n, k, b, data_type);
@@ -141,7 +170,8 @@ bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const
return ((k % mmul_k0) == 0) && (gws_y > 4);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
size_t min_acc = std::numeric_limits<size_t>::max();
size_t min_idx = 0;
@@ -150,12 +180,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConf
const size_t num_rows = configs.size();
const size_t num_cols = configs[0].size();
- ARM_COMPUTE_ERROR_ON_MSG(num_cols != 14U, "The entry should have 14 integer values representing: M, N, K, B, M0, N0. K0, V0, H0, INT_LHS, INT_RHS, TRA_LHS, TRA_RHS, IMG_RHS");
+ ARM_COMPUTE_ERROR_ON_MSG(num_cols != 14U, "The entry should have 14 integer values representing: M, N, K, B, M0, "
+ "N0. K0, V0, H0, INT_LHS, INT_RHS, TRA_LHS, TRA_RHS, IMG_RHS");
ARM_COMPUTE_UNUSED(num_cols);
// Find nearest GeMM workload
// Note: the workload does not depend on the K dimension
- for(size_t y = 0; y < num_rows; ++y)
+ for (size_t y = 0; y < num_rows; ++y)
{
size_t mc0 = static_cast<size_t>(configs[y][0]);
size_t nc0 = static_cast<size_t>(configs[y][1]);
@@ -168,7 +199,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConf
acc += (k - kc0) * (k - kc0);
acc += (b - bc0) * (b - bc0);
acc = std::sqrt(acc);
- if(acc < min_acc)
+ if (acc < min_acc)
{
min_acc = acc;
min_idx = y;
diff --git a/src/gpu/cl/kernels/gemm/ClGemmHelpers.h b/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
index 6689b10e69..84776fb207 100644
--- a/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
+++ b/src/gpu/cl/kernels/gemm/ClGemmHelpers.h
@@ -54,8 +54,18 @@ using GeMMConfigsMatrix = std::vector<std::vector<int32_t>>;
*
* @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
*/
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m, unsigned int n, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
- bool lhs_interleave, bool rhs_interleave, bool lhs_transpose, bool rhs_transpose, bool export_to_cl_image = false);
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned int m,
+ unsigned int n,
+ unsigned int m0,
+ unsigned int n0,
+ unsigned int k0,
+ unsigned int v0,
+ unsigned int h0,
+ bool lhs_interleave,
+ bool rhs_interleave,
+ bool lhs_transpose,
+ bool rhs_transpose,
+ bool export_to_cl_image = false);
/** Select @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
*
@@ -72,9 +82,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_lhs_rhs_info(unsigned
*
* @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
*/
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
- unsigned int n, unsigned int k, unsigned int b, DataType data_type);
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+select_lhs_rhs_info(std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_img,
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> info_buf,
+ unsigned int n,
+ unsigned int k,
+ unsigned int b,
+ DataType data_type);
/** Update padding required to export the OpenCL buffer to OpenCL image2d
*
@@ -103,8 +117,13 @@ Status validate_image2d_support_on_rhs(const ITensorInfo &tensor_reshaped_info,
*
* @return true if MMUL kernel is preferred over kernels w/o MMUL, false otherwise
*/
-bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const unsigned int k, const unsigned int b,
- const DataType data_type, unsigned int &best_m0, unsigned int &best_n0);
+bool is_mmul_kernel_preferred(const unsigned int m,
+ const unsigned int n,
+ const unsigned int k,
+ const unsigned int b,
+ const DataType data_type,
+ unsigned int &best_m0,
+ unsigned int &best_n0);
/** Find the preferred configurations for the LHS and RHS tensor using the GeMMConfigsMatrix provided by the user
*
@@ -116,7 +135,8 @@ bool is_mmul_kernel_preferred(const unsigned int m, const unsigned int n, const
*
* @return @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo
*/
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+find_lhs_rhs_info(const GeMMConfigsMatrix &configs, unsigned int m, unsigned int n, unsigned int k, unsigned int b);
} // namespace gemm
} // namespace kernels
} // namespace opencl
diff --git a/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h b/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h
index a49836cfda..9d08633963 100644
--- a/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/IClGemmKernelConfig.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/GPUTarget.h"
#include "arm_compute/core/Types.h"
+
#include "src/core/common/Macros.h"
#include <array>
@@ -56,8 +57,7 @@ public:
* @param[in] func_int8 Function to call for GEMM Int8 (QASYMM8, QASYMM8_SIGNED, QSYMM8_PER_CHANNEL)
*
*/
- CLGEMMConfigArray(T func_f32, T func_f16, T func_int8)
- : _configs{ func_f32, func_f16, func_int8 }
+ CLGEMMConfigArray(T func_f32, T func_f16, T func_int8) : _configs{func_f32, func_f16, func_int8}
{
}
@@ -69,7 +69,7 @@ public:
*/
T get_function(DataType data_type)
{
- switch(data_type)
+ switch (data_type)
{
case DataType::F32:
return _configs.at(DT_F32);
@@ -96,8 +96,7 @@ public:
*
* @param[in] arch GPU target
*/
- IClGemmKernelConfig(GPUTarget arch)
- : _target(arch)
+ IClGemmKernelConfig(GPUTarget arch) : _target(arch)
{
}
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(IClGemmKernelConfig);
@@ -111,7 +110,8 @@ public:
* @param[in] b Batch size
* @param[in] data_type Data type
*/
- virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
+ virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
protected:
GPUTarget _target;
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp
index d74c7fac9b..2f37eef31f 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/GPUTarget.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include <utility>
@@ -38,31 +39,34 @@ namespace kernels
{
namespace gemm
{
-ClGemmDefaultConfigNativeBifrost::ClGemmDefaultConfigNativeBifrost(GPUTarget gpu)
- : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigNativeBifrost::ClGemmDefaultConfigNativeBifrost(GPUTarget gpu) : IClGemmKernelConfig(gpu)
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigNativeBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
- unsigned int b);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigNativeBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(&ClGemmDefaultConfigNativeBifrost::configure_G71_f32,
- &ClGemmDefaultConfigNativeBifrost::configure_G71_f32, // We use the F32 heuristic
- &ClGemmDefaultConfigNativeBifrost::configure_G71_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G71(
+ &ClGemmDefaultConfigNativeBifrost::configure_G71_f32,
+ &ClGemmDefaultConfigNativeBifrost::configure_G71_f32, // We use the F32 heuristic
+ &ClGemmDefaultConfigNativeBifrost::configure_G71_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigNativeBifrost::configure_G76_f32,
- &ClGemmDefaultConfigNativeBifrost::configure_G76_f32, // We use the F32 heuristic
- &ClGemmDefaultConfigNativeBifrost::configure_G76_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(
+ &ClGemmDefaultConfigNativeBifrost::configure_G76_f32,
+ &ClGemmDefaultConfigNativeBifrost::configure_G76_f32, // We use the F32 heuristic
+ &ClGemmDefaultConfigNativeBifrost::configure_G76_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigNativeBifrost::configure_default_f32,
- &ClGemmDefaultConfigNativeBifrost::configure_default_f32, // We use the F32 heuristic
- &ClGemmDefaultConfigNativeBifrost::configure_default_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+ &ClGemmDefaultConfigNativeBifrost::configure_default_f32,
+ &ClGemmDefaultConfigNativeBifrost::configure_default_f32, // We use the F32 heuristic
+ &ClGemmDefaultConfigNativeBifrost::configure_default_u8);
ConfigurationFunctionExecutorPtr func = nullptr;
- switch(_target)
+ switch (_target)
{
case GPUTarget::G76:
func = configs_G76.get_function(data_type);
@@ -79,18 +83,19 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n < 2048)
+ if (n < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
}
- else if(n >= 2048 && n < 8192)
+ else if (n >= 2048 && n < 8192)
{
return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 1, false, false, false, false);
}
@@ -105,20 +110,21 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(dot8_supported(CLKernelLibrary::get().get_device()))
+ if (dot8_supported(CLKernelLibrary::get().get_device()))
{
- if(m == 1)
+ if (m == 1)
{
- if(n < 2048)
+ if (n < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 1, false, false, false, false);
}
- else if(n >= 2048 && n < 16384)
+ else if (n >= 2048 && n < 16384)
{
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
}
@@ -129,7 +135,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
else
{
- if(m < 64)
+ if (m < 64)
{
return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 1, false, false, false, false);
}
@@ -141,9 +147,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
else
{
- if(m == 1)
+ if (m == 1)
{
- if(n < 8192)
+ if (n < 8192)
{
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
}
@@ -159,24 +165,25 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n > 4196)
+ if (n > 4196)
{
return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 1, false, false, false, false);
}
else
{
- if(k < 2048)
+ if (k < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 1, false, false, false, false);
}
- else if(k >= 2048 && k < 16384)
+ else if (k >= 2048 && k < 16384)
{
return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
}
@@ -192,18 +199,19 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n < 2048)
+ if (n < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 1, false, false, false, false);
}
- else if(n >= 2048 && n < 16384)
+ else if (n >= 2048 && n < 16384)
{
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
}
@@ -214,7 +222,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
else
{
- if(m < 64)
+ if (m < 64)
{
return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 1, false, false, false, false);
}
@@ -225,7 +233,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
@@ -233,7 +242,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 1, false, false, false, false);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost::configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeBifrost::configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
@@ -243,4 +253,4 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeBifrost
} // namespace gemm
} // namespace kernels
} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h
index 9af5dc4135..f822daae53 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.h
@@ -45,15 +45,22 @@ public:
ClGemmDefaultConfigNativeBifrost(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G71_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G71_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_default_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp
index b9f36c7210..f87fb1b659 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/GPUTarget.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include <utility>
@@ -38,18 +39,17 @@ namespace kernels
{
namespace gemm
{
-ClGemmDefaultConfigNativeMidgard::ClGemmDefaultConfigNativeMidgard(GPUTarget gpu)
- : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigNativeMidgard::ClGemmDefaultConfigNativeMidgard(GPUTarget gpu) : IClGemmKernelConfig(gpu)
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigNativeMidgard::*)(unsigned int m, unsigned int n, unsigned int k,
- unsigned int b);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigNativeMidgard::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(nullptr,
- nullptr,
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(nullptr, nullptr,
&ClGemmDefaultConfigNativeMidgard::default_q8);
auto func = configs_default.get_function(data_type);
@@ -57,7 +57,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard::default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeMidgard::default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
@@ -70,4 +71,4 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeMidgard
} // namespace gemm
} // namespace kernels
} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h
index c055753c48..fa76c5dba7 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.h
@@ -45,10 +45,12 @@ public:
ClGemmDefaultConfigNativeMidgard(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp
index 95a4d2bd69..97a1298b0a 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/GPUTarget.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include <utility>
@@ -38,37 +39,38 @@ namespace kernels
{
namespace gemm
{
-ClGemmDefaultConfigNativeValhall::ClGemmDefaultConfigNativeValhall(GPUTarget gpu)
- : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigNativeValhall::ClGemmDefaultConfigNativeValhall(GPUTarget gpu) : IClGemmKernelConfig(gpu)
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigNativeValhall::*)(unsigned int m, unsigned int n, unsigned int k,
- unsigned int b);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigNativeValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(&ClGemmDefaultConfigNativeValhall::configure_G77_f32,
- &ClGemmDefaultConfigNativeValhall::configure_G77_f16,
- &ClGemmDefaultConfigNativeValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_default(
+ &ClGemmDefaultConfigNativeValhall::configure_G77_f32, &ClGemmDefaultConfigNativeValhall::configure_G77_f16,
+ &ClGemmDefaultConfigNativeValhall::configure_G77_u8);
auto func = configs_default.get_function(data_type);
ARM_COMPUTE_ERROR_ON_MSG(func == nullptr, "Data type not support for GEMM");
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n < 2048)
+ if (n < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
}
- else if(n >= 2048 && n < 8192)
+ else if (n >= 2048 && n < 8192)
{
return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 1, false, false, false, false);
}
@@ -83,18 +85,19 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n < 2048)
+ if (n < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, 1, false, false, false, false);
}
- else if(n >= 2048 && n < 8192)
+ else if (n >= 2048 && n < 8192)
{
return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 1, false, false, false, false);
}
@@ -109,20 +112,21 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigNativeValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(dot8_supported(CLKernelLibrary::get().get_device()))
+ if (dot8_supported(CLKernelLibrary::get().get_device()))
{
- if(m == 1)
+ if (m == 1)
{
- if(n < 2048)
+ if (n < 2048)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 1, false, false, false, false);
}
- else if(n >= 2048 && n < 16384)
+ else if (n >= 2048 && n < 16384)
{
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
}
@@ -133,7 +137,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
}
else
{
- if(m < 64)
+ if (m < 64)
{
return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 1, false, false, false, false);
}
@@ -145,9 +149,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
}
else
{
- if(m == 1)
+ if (m == 1)
{
- if(n < 8192)
+ if (n < 8192)
{
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 1, false, false, false, false);
}
@@ -165,4 +169,4 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigNativeValhall
} // namespace gemm
} // namespace kernels
} // namespace opencl
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h
index f0f812fd46..c91b095279 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.h
@@ -45,12 +45,16 @@ public:
ClGemmDefaultConfigNativeValhall(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
index cf8412830b..955bb3c01a 100644
--- a/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/native/ClGemmNativeKernelConfig.h
@@ -51,7 +51,7 @@ public:
*/
static std::unique_ptr<IClGemmKernelConfig> create(GPUTarget gpu)
{
- switch(get_arch_from_target(gpu))
+ switch (get_arch_from_target(gpu))
{
case GPUTarget::MIDGARD:
return std::make_unique<ClGemmDefaultConfigNativeMidgard>(gpu);
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp
index 657018eb53..c956c347ef 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp
@@ -29,6 +29,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include <utility>
@@ -43,30 +44,31 @@ namespace gemm
{
using namespace arm_compute::misc::shape_calculator;
-ClGemmDefaultConfigReshapedBifrost::ClGemmDefaultConfigReshapedBifrost(GPUTarget gpu)
- : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigReshapedBifrost::ClGemmDefaultConfigReshapedBifrost(GPUTarget gpu) : IClGemmKernelConfig(gpu)
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigReshapedBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32,
- &ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16,
- &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+ &ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32, &ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16,
+ &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(&ClGemmDefaultConfigReshapedBifrost::configure_G52_f32,
- &ClGemmDefaultConfigReshapedBifrost::configure_G52_f16,
- &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(
+ &ClGemmDefaultConfigReshapedBifrost::configure_G52_f32, &ClGemmDefaultConfigReshapedBifrost::configure_G52_f16,
+ &ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigReshapedBifrost::configure_G76_f32,
- &ClGemmDefaultConfigReshapedBifrost::configure_G76_f16,
- &ClGemmDefaultConfigReshapedBifrost::configure_G76_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(
+ &ClGemmDefaultConfigReshapedBifrost::configure_G76_f32, &ClGemmDefaultConfigReshapedBifrost::configure_G76_f16,
+ &ClGemmDefaultConfigReshapedBifrost::configure_G76_u8);
ConfigurationFunctionExecutorPtr func = nullptr;
- switch(_target)
+ switch (_target)
{
case GPUTarget::G76:
func = configs_G76.get_function(data_type);
@@ -83,12 +85,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
}
@@ -98,12 +101,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 8, 8, 2, true, true, true, false);
}
@@ -113,14 +117,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(dot8_supported(CLKernelLibrary::get().get_device()))
+ if (dot8_supported(CLKernelLibrary::get().get_device()))
{
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 16, 2, 2, true, false, false, true);
}
@@ -131,7 +136,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
else
{
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 8, 2, 2, true, false, false, true);
}
@@ -142,7 +147,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
@@ -154,100 +160,108 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
GEMMLHSMatrixInfo lhs_info_img;
GEMMRHSMatrixInfo rhs_info_img;
- if(workload <= 274.4000f)
+ if (workload <= 274.4000f)
{
- if(r_nk <= 0.7461f)
+ if (r_nk <= 0.7461f)
{
- if(r_mn <= 21.1667f)
+ if (r_mn <= 21.1667f)
{
return configure_lhs_rhs_info(m, n, 4, 2, 4, 4, 4, false, true, true, false, false);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
{
- if(r_mk <= 17.3926f)
+ if (r_mk <= 17.3926f)
{
- if(workload <= 542.4000f)
+ if (workload <= 542.4000f)
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
{
- if(r_nk <= 0.5463f)
+ if (r_nk <= 0.5463f)
{
- if(workload <= 11767.6001f)
+ if (workload <= 11767.6001f)
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, true, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
- if(workload <= 323.4000f)
+ if (workload <= 323.4000f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 8, 4, 8, false, false, false, true, false);
}
@@ -257,7 +271,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
@@ -268,7 +283,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
GEMMRHSMatrixInfo rhs_info_img;
// Get lhs_info/rhs_info in case of OpenCL buffer
- if(n <= 4)
+ if (n <= 4)
{
std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, true, false, false, true);
}
@@ -279,15 +294,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
// Get lhs_info/rhs_info in case of OpenCL image
// Condition on the GPU workload
- if((m / 4) * (n / 4) >= 2560)
+ if ((m / 4) * (n / 4) >= 2560)
{
// Big workload
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 8, true, true, true, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 8, true, true, true, false, true);
}
else
{
// Small workload
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 1, true, true, true, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 1, true, true, true, false, true);
}
const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
@@ -297,7 +314,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
// In case of vector by matrix with few work-items, we use the OpenCL buffer rather than the OpenCL image2d
const bool use_cl_image2d = (n <= 4) ? false : true;
- if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
+ if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
{
return std::make_pair(lhs_info_img, rhs_info_img);
}
@@ -307,16 +324,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
const float r_mk = static_cast<float>(m) / static_cast<float>(k);
- if(workload <= 1595.2000f)
+ if (workload <= 1595.2000f)
{
- if(r_mk <= 2.1044f)
+ if (r_mk <= 2.1044f)
{
- if(workload <= 870.4000f)
+ if (workload <= 870.4000f)
{
return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 2, true, false, true, false, false);
}
@@ -336,12 +354,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifro
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, false, false, false, true);
}
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h
index d86d1ba0a7..9227ec2551 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.h
@@ -45,17 +45,26 @@ public:
ClGemmDefaultConfigReshapedBifrost(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp
index 58d0873b86..70b324eb5a 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/GPUTarget.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include <utility>
@@ -38,26 +39,27 @@ namespace kernels
{
namespace gemm
{
-ClGemmDefaultConfigReshapedValhall::ClGemmDefaultConfigReshapedValhall(GPUTarget gpu)
- : IClGemmKernelConfig(gpu)
+ClGemmDefaultConfigReshapedValhall::ClGemmDefaultConfigReshapedValhall(GPUTarget gpu) : IClGemmKernelConfig(gpu)
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigReshapedValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClGemmDefaultConfigReshapedValhall::configure_G77_f32,
- &ClGemmDefaultConfigReshapedValhall::configure_G77_f16,
- &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(
+ &ClGemmDefaultConfigReshapedValhall::configure_G77_f32, &ClGemmDefaultConfigReshapedValhall::configure_G77_f16,
+ &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClGemmDefaultConfigReshapedValhall::configure_G78_f32,
- &ClGemmDefaultConfigReshapedValhall::configure_G78_f16,
- &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
+ &ClGemmDefaultConfigReshapedValhall::configure_G78_f32, &ClGemmDefaultConfigReshapedValhall::configure_G78_f16,
+ &ClGemmDefaultConfigReshapedValhall::configure_G77_u8);
ConfigurationFunctionExecutorPtr func = nullptr;
- switch(_target)
+ switch (_target)
{
case GPUTarget::G78:
func = configs_G78.get_function(data_type);
@@ -72,12 +74,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 8, 16, 16, 1, 0, 0, 1);
}
@@ -87,7 +90,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
@@ -104,17 +108,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 0);
- if(r_mk <= 0.11824845522642136)
+ if (r_mk <= 0.11824845522642136)
{
- if(workload <= 880.0)
+ if (workload <= 880.0)
{
return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, 0, 0, 1, 0, 0);
}
else
{
- if(r_nk <= 0.42521367967128754)
+ if (r_nk <= 0.42521367967128754)
{
- if(workload <= 1726.4000244140625)
+ if (workload <= 1726.4000244140625)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 0);
}
@@ -123,13 +127,12 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
else
{
- if(workload <= 1241.6000366210938)
+ if (workload <= 1241.6000366210938)
{
return configure_lhs_rhs_info(m, n, 2, 4, 4, 1, 4, 0, 0, 1, 0, 0);
}
@@ -142,17 +145,16 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 11404.7998046875)
+ if (workload <= 11404.7998046875)
{
- if(r_mk <= 1.0126488208770752)
+ if (r_mk <= 1.0126488208770752)
{
- if(r_mn <= 2.545312523841858)
+ if (r_mn <= 2.545312523841858)
{
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
else
{
@@ -161,43 +163,39 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 2881.199951171875)
+ if (workload <= 2881.199951171875)
{
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 2, 0, 0, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
else
{
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
}
else
{
- if(r_nk <= 0.5765306055545807)
+ if (r_nk <= 0.5765306055545807)
{
- if(r_mn <= 6.010416746139526)
+ if (r_mn <= 6.010416746139526)
{
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 0, 1, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
else
{
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 1, 0, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
else
@@ -205,27 +203,27 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 1, 1, 0, 1, 0, 1);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float r_mk = static_cast<float>(m) / static_cast<float>(k);
const float r_nk = static_cast<float>(n) / static_cast<float>(k);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
- if(workload <= 1288.0000f)
+ if (workload <= 1288.0000f)
{
- if(workload <= 505.6000f)
+ if (workload <= 505.6000f)
{
- if(r_mn <= 0.4466f)
+ if (r_mn <= 0.4466f)
{
- if(r_nk <= 0.2384f)
+ if (r_nk <= 0.2384f)
{
return configure_lhs_rhs_info(m, n, 2, 4, 8, 4, 4, 0, 0, 1, 0, 1);
}
@@ -241,9 +239,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(r_mn <= 0.2250f)
+ if (r_mn <= 0.2250f)
{
- if(r_mn <= 0.1599f)
+ if (r_mn <= 0.1599f)
{
return configure_lhs_rhs_info(m, n, 2, 4, 8, 4, 4, 0, 0, 1, 0, 1);
}
@@ -254,11 +252,11 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(r_mk <= 0.7609f)
+ if (r_mk <= 0.7609f)
{
- if(r_mn <= 2.5453f)
+ if (r_mn <= 2.5453f)
{
- if(workload <= 1089.6000f)
+ if (workload <= 1089.6000f)
{
return configure_lhs_rhs_info(m, n, 2, 4, 8, 4, 4, 0, 0, 1, 0, 1);
}
@@ -281,29 +279,29 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 5434.4001f)
+ if (workload <= 5434.4001f)
{
- if(workload <= 1603.2000f)
+ if (workload <= 1603.2000f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
else
{
- if(r_nk <= 0.6192f)
+ if (r_nk <= 0.6192f)
{
- if(r_mn <= 16.1016f)
+ if (r_mn <= 16.1016f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
else
{
- if(workload <= 2750.0000f)
+ if (workload <= 2750.0000f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
else
{
- if(r_mk <= 6.3151f)
+ if (r_mk <= 6.3151f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 0, 1, 1);
}
@@ -316,15 +314,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(r_mk <= 0.0387f)
+ if (r_mk <= 0.0387f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 1);
}
else
{
- if(r_mk <= 2.5859f)
+ if (r_mk <= 2.5859f)
{
- if(r_mk <= 0.2734f)
+ if (r_mk <= 0.2734f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 1);
}
@@ -343,13 +341,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(r_mk <= 25.7500f)
+ if (r_mk <= 25.7500f)
{
- if(r_mk <= 0.3615f)
+ if (r_mk <= 0.3615f)
{
- if(r_mn <= 0.0913f)
+ if (r_mn <= 0.0913f)
{
- if(r_mk <= 0.0683f)
+ if (r_mk <= 0.0683f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 4, 2, 0, 0, 1, 0, 1);
}
@@ -365,15 +363,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 11174.3999f)
+ if (workload <= 11174.3999f)
{
- if(r_mk <= 0.8047f)
+ if (r_mk <= 0.8047f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
else
{
- if(workload <= 7185.5999f)
+ if (workload <= 7185.5999f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 1, 0, 1);
}
@@ -385,9 +383,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 17917.5000f)
+ if (workload <= 17917.5000f)
{
- if(r_mk <= 1.5078f)
+ if (r_mk <= 1.5078f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
@@ -398,7 +396,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 34449.6016f)
+ if (workload <= 34449.6016f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
@@ -412,11 +410,11 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(r_mk <= 331.1111f)
+ if (r_mk <= 331.1111f)
{
- if(workload <= 53397.5996f)
+ if (workload <= 53397.5996f)
{
- if(r_mn <= 57.8063f)
+ if (r_mn <= 57.8063f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
@@ -427,7 +425,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(r_nk <= 0.9211f)
+ if (r_nk <= 0.9211f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 4, 2, 0, 0, 1, 0, 1);
}
@@ -439,7 +437,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 38070.4004f)
+ if (workload <= 38070.4004f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 4, 4, 0, 0, 0, 1, 1);
}
@@ -453,27 +451,28 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float r_nk = static_cast<float>(n) / static_cast<float>(k);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
- if(workload <= 801.6000f)
+ if (workload <= 801.6000f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, 0, 0, 1, 0, 1);
}
else
{
- if(r_mn <= 0.1211f)
+ if (r_mn <= 0.1211f)
{
- if(workload <= 3296.0000f)
+ if (workload <= 3296.0000f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
else
{
- if(r_nk <= 1.0625f)
+ if (r_nk <= 1.0625f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
@@ -485,15 +484,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 5068.8000f)
+ if (workload <= 5068.8000f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, 0, 0, 1, 0, 1);
}
else
{
- if(r_nk <= 0.2361f)
+ if (r_nk <= 0.2361f)
{
- if(workload <= 12630.0000f)
+ if (workload <= 12630.0000f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, 0, 0, 1, 0, 1);
}
@@ -504,7 +503,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
else
{
- if(workload <= 178790.3984f)
+ if (workload <= 178790.3984f)
{
return configure_lhs_rhs_info(m, n, 8, 4, 4, 2, 2, 0, 0, 1, 0, 1);
}
@@ -518,12 +517,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValha
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ClGemmDefaultConfigReshapedValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(n <= 4)
+ if (n <= 4)
{
return configure_lhs_rhs_info(m, n, 4, 2, 16, 4, 1, 0, 0, 0, 1);
}
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h
index 466eda00a6..5f62efb59e 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.h
@@ -45,14 +45,20 @@ public:
ClGemmDefaultConfigReshapedValhall(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
index 1c32f1358b..83928b3f4f 100644
--- a/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/reshaped/ClGemmReshapedKernelConfig.h
@@ -50,7 +50,7 @@ public:
*/
static std::unique_ptr<IClGemmKernelConfig> create(GPUTarget gpu)
{
- switch(get_arch_from_target(gpu))
+ switch (get_arch_from_target(gpu))
{
case GPUTarget::MIDGARD:
case GPUTarget::BIFROST:
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp
index 9c23d9c998..c4825bfbeb 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp
@@ -29,7 +29,9 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
+
#include <utility>
namespace arm_compute
@@ -47,33 +49,39 @@ ClGemmDefaultConfigReshapedRhsOnlyBifrost::ClGemmDefaultConfigReshapedRhsOnlyBif
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k,
- unsigned int b);
-
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G51(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8);
-
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
-
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G31(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8);
-
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8);
-
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(&ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigReshapedRhsOnlyBifrost::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G51(
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8);
+
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G52(
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
+
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G31(
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8);
+
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G76(
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8);
+
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G7x(
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8);
ConfigurationFunctionExecutorPtr func = nullptr;
- switch(_target)
+ switch (_target)
{
case GPUTarget::G76:
func = configs_G76.get_function(data_type);
@@ -96,14 +104,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n <= 2548)
+ if (n <= 2548)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, false, true, false, true, false);
}
@@ -118,12 +127,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G31_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
@@ -131,7 +141,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
else
{
const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
- if(m >= 28)
+ if (m >= 28)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, 0, 1, 0, 1);
}
@@ -142,7 +152,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
@@ -154,9 +165,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
const bool is_workload_big = ((m * n * b) / 16) >= 2048;
- if(m == 1)
+ if (m == 1)
{
- if(n >= 8192)
+ if (n >= 8192)
{
const unsigned int h0 = std::max(n / 4, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 8, 1, h0, false, true, false, true, false);
@@ -164,7 +175,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
else
{
const unsigned int h0 = std::max(n / 2, 1U);
- if(n <= 204)
+ if (n <= 204)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true, false);
}
@@ -177,25 +188,29 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
else
{
const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
- if(is_workload_big)
+ if (is_workload_big)
{
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, true);
}
else
{
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true);
}
}
// Get lhs_info/rhs_info in case of OpenCL image
const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
- if(is_workload_big)
+ if (is_workload_big)
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 2, 4, 8, 1, h0, false, true, false, true, true);
}
const TensorInfo tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
@@ -205,7 +220,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
// In case of vector by matrix or small workloads, we use the OpenCL buffer rather than the OpenCL image2d
const bool use_cl_image2d = ((m == 1) || ((((m * n * b) / 16) < 2048) && n < 128)) ? false : true;
- if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
+ if (bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
{
return std::make_pair(lhs_info_img, rhs_info_img);
}
@@ -215,7 +230,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
const float r_nk = static_cast<float>(n) / static_cast<float>(k);
@@ -225,46 +241,49 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
GEMMLHSMatrixInfo lhs_info_img;
GEMMRHSMatrixInfo rhs_info_img;
- if(m == 1)
+ if (m == 1)
{
- if(r_nk <= 0.4664f)
+ if (r_nk <= 0.4664f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 16, false, true, false, true, false);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, false, true, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
{
- if(workload <= 274.4000f)
+ if (workload <= 274.4000f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 16, false, false, false, true, false);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, false, false, true, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
const unsigned int n0 = n < 1280 ? 2 : 4;
const unsigned int h0 = std::max(n / n0, 1U);
@@ -276,14 +295,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
- if(n > 2048)
+ if (n > 2048)
{
const unsigned int h0 = std::max(n / 4, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, false, true, false, true);
@@ -300,7 +320,8 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G52_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
@@ -312,57 +333,59 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
GEMMLHSMatrixInfo lhs_info_img;
GEMMRHSMatrixInfo rhs_info_img;
- if(m == 1)
+ if (m == 1)
{
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, false);
- if(r_mk <= 0.0026f)
+ if (r_mk <= 0.0026f)
{
- if(r_nk <= 0.4664f)
+ if (r_nk <= 0.4664f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
else
{
- if(r_mk <= 0.0148f)
+ if (r_mk <= 0.0148f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 1, 4, 16, 1, 16, false, true, false, false, true);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
}
else
{
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 5, 8, 4, 1, 2, false, false, false, false, false);
- if(workload <= 362.6000f)
+ if (workload <= 362.6000f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
}
else
{
- if(r_mn <= 22.6067f)
+ if (r_mn <= 22.6067f)
{
- if(workload <= 708.8000f)
+ if (workload <= 708.8000f)
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
else
{
@@ -371,27 +394,28 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(r_nk <= 0.0917f)
+ if (r_nk <= 0.0917f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 16, false, false, false, true, false);
}
else
{
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, false, false, false, true);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
}
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
- if(m == 1)
+ if (m == 1)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 32, false, true, false, true, false);
}
@@ -400,15 +424,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
- if(workload <= 7449.60f)
+ if (workload <= 7449.60f)
{
- if(workload <= 691.60f)
+ if (workload <= 691.60f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 8, false, false, false, false, false);
}
else
{
- if(workload <= 4155.20f)
+ if (workload <= 4155.20f)
{
return configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
}
@@ -420,21 +444,22 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(workload <= 16300.80f)
+ if (workload <= 16300.80f)
{
- if(r_mn <= 44.56f)
+ if (r_mn <= 44.56f)
{
GEMMLHSMatrixInfo lhs_info_buf;
GEMMRHSMatrixInfo rhs_info_buf;
GEMMLHSMatrixInfo lhs_info_img;
GEMMRHSMatrixInfo rhs_info_img;
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 8, 4, 4, 1, 1, false, true, false, false, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
else
{
@@ -448,23 +473,25 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
GEMMLHSMatrixInfo lhs_info_img;
GEMMRHSMatrixInfo rhs_info_img;
- std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
- std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
+ std::tie(lhs_info_img, rhs_info_img) =
+ configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 2, false, true, false, false, true);
+ std::tie(lhs_info_buf, rhs_info_buf) =
+ configure_lhs_rhs_info(m, n, 5, 2, 8, 1, 16, false, false, false, false, false);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F16);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F16);
}
}
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
const unsigned int n0 = n < 1280 ? 2 : 4;
const unsigned int h0 = std::max(n / n0, 1U);
@@ -476,14 +503,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G7x_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(dot8_supported(CLKernelLibrary::get().get_device()))
+ if (dot8_supported(CLKernelLibrary::get().get_device()))
{
- if(m == 1)
+ if (m == 1)
{
const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
@@ -497,7 +525,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
else
{
const int h0 = std::max(std::min(static_cast<int>(n / 2), static_cast<int>(128)), static_cast<int>(1));
- if(m == 1)
+ if (m == 1)
{
return configure_lhs_rhs_info(m, n, 1, 2, 4, 1, h0, false, true, false, true);
}
@@ -508,12 +536,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G76_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, h0, false, true, false, true);
@@ -524,12 +553,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyBifrost::configure_G51_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, false, true, false, true);
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h
index 321cbb5250..77c0c8d500 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.h
@@ -45,21 +45,34 @@ public:
ClGemmDefaultConfigReshapedRhsOnlyBifrost(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G52_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G51_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G7x_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G52_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G51_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G51_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G31_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
index d08bf84c72..da3e2ec912 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp
@@ -50,30 +50,35 @@ ClGemmDefaultConfigReshapedRhsOnlyValhall::ClGemmDefaultConfigReshapedRhsOnlyVal
{
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type)
{
- using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k,
- unsigned int b);
+ using ConfigurationFunctionExecutorPtr = std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> (
+ ClGemmDefaultConfigReshapedRhsOnlyValhall::*)(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G77(
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G78(
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G710(
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
- CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(&ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16,
- &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
+ CLGEMMConfigArray<ConfigurationFunctionExecutorPtr> configs_G715(
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16,
+ &ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8);
ConfigurationFunctionExecutorPtr func = nullptr;
- switch(_target)
+ switch (_target)
{
case GPUTarget::G78:
func = configs_G78.get_function(data_type);
@@ -96,29 +101,29 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
return (this->*func)(m, n, k, b);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
- if(m == 1)
+ if (m == 1)
{
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float r_mk = static_cast<float>(m) / static_cast<float>(k);
- if(r_mk <= 0.0064484127797186375)
+ if (r_mk <= 0.0064484127797186375)
{
- if(r_mn <= 0.0028273810748942196)
+ if (r_mn <= 0.0028273810748942196)
{
GEMMLHSMatrixInfo lhs_info_buf;
GEMMRHSMatrixInfo rhs_info_buf;
GEMMLHSMatrixInfo lhs_info_img;
GEMMRHSMatrixInfo rhs_info_img;
- const unsigned int h0 = std::max(n / 4, 1U);
+ const unsigned int h0 = std::max(n / 4, 1U);
std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 8, 1, 16, 0, 1, 0, 0, 1);
std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, h0, 0, 1, 0, 1, 0);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
else
{
@@ -127,7 +132,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(r_mk <= 0.020312500186264515)
+ if (r_mk <= 0.020312500186264515)
{
return configure_lhs_rhs_info(m, n, 1, 2, 16, 1, 4, 0, 1, 0, 0, 0);
}
@@ -143,9 +148,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
const float r_mk = static_cast<float>(m) / static_cast<float>(k);
- if(workload <= 1999.2000122070312)
+ if (workload <= 1999.2000122070312)
{
- if(workload <= 747.1999816894531)
+ if (workload <= 747.1999816894531)
{
return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
}
@@ -159,15 +164,14 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
{
- if(r_mn <= 0.03348214365541935)
+ if (r_mn <= 0.03348214365541935)
{
- if(r_mk <= 0.028125000186264515)
+ if (r_mk <= 0.028125000186264515)
{
return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
}
@@ -181,8 +185,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 8, 0, 1, 0, 1, 0);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
else
@@ -195,168 +198,112 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 1, 0, 1, 0);
return select_lhs_rhs_info(std::make_pair(lhs_info_img, rhs_info_img),
- std::make_pair(lhs_info_buf, rhs_info_buf),
- n, k, b, DataType::F32);
+ std::make_pair(lhs_info_buf, rhs_info_buf), n, k, b, DataType::F32);
}
}
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
- const GeMMConfigsMatrix configs_1nkb_best =
- {
- { 1, 8984, 640, 1, 1, 8, 8, 1, 0, 1, 1, 1, 1, 0 },
- { 1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 6512, 6404, 1, 1, 4, 8, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 5304, 640, 1, 1, 4, 4, 1, 0, 1, 0, 1, 1, 0 },
- { 1, 1352, 1520, 1, 1, 2, 8, 1, 0, 1, 1, 1, 1, 0 },
- { 1, 4096, 25088, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_n_small_best =
- {
- { 102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0 },
- { 102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1 },
- { 16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1 },
- { 16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 1 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_n_small_fallback =
- {
- { 102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0 },
- { 102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0 },
- { 16384, 4, 128, 1, 2, 2, 16, 1, 2, 1, 1, 1, 1, 0 },
- { 16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_m_gt_n_best =
- {
- { 25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
- { 25584, 16, 68, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1 },
- { 369664, 32, 28, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1 },
- { 65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 23036, 56, 736, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 90968, 40, 600, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 12604, 60, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback =
- {
- { 25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
- { 25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 0, 0 },
- { 369664, 32, 28, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0 },
- { 65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 23036, 56, 736, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 90968, 40, 600, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0 },
- { 50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0 },
- { 12604, 60, 160, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0 },
- { 29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_n_gt_m_best =
- {
- { 24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0 },
- { 49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 1 },
- { 49, 1024, 1024, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- };
-
- const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback =
- {
- { 24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0 },
- { 49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 49, 1024, 1024, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0 },
+ const GeMMConfigsMatrix configs_1nkb_best = {
+ {1, 8984, 640, 1, 1, 8, 8, 1, 0, 1, 1, 1, 1, 0}, {1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0},
+ {1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0}, {1, 6512, 6404, 1, 1, 4, 8, 1, 0, 1, 0, 1, 0, 0},
+ {1, 5304, 640, 1, 1, 4, 4, 1, 0, 1, 0, 1, 1, 0}, {1, 1352, 1520, 1, 1, 2, 8, 1, 0, 1, 1, 1, 1, 0},
+ {1, 4096, 25088, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0}, {1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_n_small_best = {{102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0},
+ {102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1},
+ {16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 1, 1},
+ {16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 1}};
+
+ const GeMMConfigsMatrix configs_mnkb_n_small_fallback = {{102400, 4, 96, 1, 2, 2, 16, 1, 4, 1, 1, 1, 1, 0},
+ {102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0},
+ {16384, 4, 128, 1, 2, 2, 16, 1, 2, 1, 1, 1, 1, 0},
+ {16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 1, 1, 1, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_m_gt_n_best = {
+ {25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0}, {25584, 16, 68, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1},
+ {369664, 32, 28, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1}, {65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+ {23036, 56, 736, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1}, {90968, 40, 600, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
+ {8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1}, {50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+ {16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1}, {12604, 60, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
+ {29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0}, {12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0},
+ {2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0}, {3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback = {
+ {25584, 88, 16, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0}, {25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 0, 0},
+ {369664, 32, 28, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0}, {65792, 44, 24, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+ {23036, 56, 736, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {90968, 40, 600, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},
+ {8944, 32, 776, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0}, {50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0},
+ {16544, 104, 160, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0}, {12604, 60, 160, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0},
+ {29584, 32, 28, 1, 4, 4, 4, 1, 128, 1, 1, 1, 0, 0}, {12544, 32, 27, 1, 2, 8, 8, 1, 128, 1, 1, 1, 0, 0},
+ {2688, 136, 1492, 1, 8, 4, 4, 1, 128, 1, 1, 1, 0, 0}, {3728, 96, 196, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_n_gt_m_best = {
+ {24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0},
+ {49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 1},
+ {49, 1024, 1024, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
};
- const GeMMConfigsMatrix configs_mnkb_squared_best =
- {
- { 72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0 },
- { 268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0 },
- { 180, 420, 952, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 196, 512, 512, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1 },
- { 24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 },
- { 24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 }
+ const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback = {
+ {24, 488, 88, 1, 2, 4, 16, 1, 4, 1, 1, 1, 0, 0},
+ {49, 1024, 512, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+ {49, 1024, 1024, 1, 4, 4, 8, 1, 256, 1, 1, 1, 0, 0},
};
- const GeMMConfigsMatrix configs_mnkb_squared_fallback =
- {
- { 72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0 },
- { 268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0 },
- { 180, 420, 952, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 196, 512, 512, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0 },
- { 24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 },
- { 24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_best_batched =
- {
- { 3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1 },
- { 688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_fallback_batched =
- {
- { 3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 4096, 48, 32, 36, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0 },
- { 112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 },
- { 2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0 }
- };
+ const GeMMConfigsMatrix configs_mnkb_squared_best = {
+ {72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0}, {268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0},
+ {180, 420, 952, 1, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1}, {1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+ {272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {196, 512, 512, 1, 5, 4, 4, 1, 64, 1, 1, 1, 0, 1},
+ {24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0}, {24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_squared_fallback = {
+ {72, 92, 136, 1, 2, 2, 8, 1, 128, 1, 1, 1, 1, 0}, {268, 824, 5076, 1, 4, 8, 4, 1, 256, 1, 1, 1, 0, 0},
+ {180, 420, 952, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0}, {1000, 152, 304, 1, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+ {272, 400, 2116, 1, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {196, 512, 512, 1, 5, 4, 4, 1, 256, 1, 1, 1, 0, 0},
+ {24, 88, 236, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0}, {24, 88, 488, 1, 2, 2, 8, 1, 64, 1, 1, 1, 1, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_best_batched = {
+ {3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 1},
+ {688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+ {112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},
+ {1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_fallback_batched = {
+ {3136, 64, 64, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {4096, 48, 32, 36, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+ {688, 92, 68, 32, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {24, 464, 412, 24, 4, 4, 8, 1, 128, 1, 1, 1, 0, 0},
+ {112, 184, 144, 28, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {5776, 64, 32, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0},
+ {1568, 64, 40, 36, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}, {2920, 64, 64, 24, 4, 8, 4, 1, 64, 1, 1, 1, 0, 0}};
const GeMMConfigsMatrix *configs_best_to_use = nullptr;
const GeMMConfigsMatrix *configs_fallback_to_use = nullptr;
- if(b == 1)
+ if (b == 1)
{
constexpr float ratio_m_gt_n = 10.f;
constexpr float ratio_n_gt_m = 0.1f;
constexpr unsigned int n_small_thr = 4;
const float ratio = static_cast<float>(m) / static_cast<float>(n);
- if(m == 1)
+ if (m == 1)
{
// We do not need fallback in this case, as we never use cl_image for the rhs tensor
configs_best_to_use = &configs_1nkb_best;
configs_fallback_to_use = &configs_1nkb_best;
}
- else if(n <= n_small_thr && ratio > ratio_m_gt_n)
+ else if (n <= n_small_thr && ratio > ratio_m_gt_n)
{
configs_best_to_use = &configs_mnkb_n_small_best;
configs_fallback_to_use = &configs_mnkb_n_small_fallback;
}
- else if(ratio > ratio_m_gt_n)
+ else if (ratio > ratio_m_gt_n)
{
configs_best_to_use = &configs_mnkb_m_gt_n_best;
configs_fallback_to_use = &configs_mnkb_m_gt_n_fallback;
}
- else if(ratio < ratio_n_gt_m)
+ else if (ratio < ratio_n_gt_m)
{
configs_best_to_use = &configs_mnkb_n_gt_m_best;
configs_fallback_to_use = &configs_mnkb_n_gt_m_fallback;
@@ -381,17 +328,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
std::tie(lhs_info0, rhs_info0) = find_lhs_rhs_info(*configs_best_to_use, m, n, k, b);
std::tie(lhs_info1, rhs_info1) = find_lhs_rhs_info(*configs_fallback_to_use, m, n, k, b);
- return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0),
- std::make_pair(lhs_info1, rhs_info1),
- n, k, b, DataType::F16);
+ return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0), std::make_pair(lhs_info1, rhs_info1), n, k, b,
+ DataType::F16);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G77_u8(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
ARM_COMPUTE_UNUSED(k);
ARM_COMPUTE_UNUSED(b);
- if(m == 1)
+ if (m == 1)
{
const unsigned int h0 = std::max(n / 2, 1U);
return configure_lhs_rhs_info(m, n, 1, 4, 16, 1, h0, 0, 1, 0, 1);
@@ -399,7 +346,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
else
{
const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(256)), static_cast<int>(1));
- if(m >= 28)
+ if (m >= 28)
{
return configure_lhs_rhs_info(m, n, 4, 4, 16, 1, h0, 0, 1, 0, 1);
}
@@ -410,30 +357,31 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float r_mk = static_cast<float>(m) / static_cast<float>(k);
const float r_nk = static_cast<float>(n) / static_cast<float>(k);
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
- if(m == 1)
+ if (m == 1)
{
- if(workload <= 278.7000f)
+ if (workload <= 278.7000f)
{
- if(workload <= 7.5000f)
+ if (workload <= 7.5000f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
}
else
{
- if(r_mn <= 0.0031f)
+ if (r_mn <= 0.0031f)
{
- if(workload <= 256.6000f)
+ if (workload <= 256.6000f)
{
- if(workload <= 16.7500f)
+ if (workload <= 16.7500f)
{
- if(r_nk <= 1.6671f)
+ if (r_nk <= 1.6671f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
}
@@ -454,15 +402,15 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(r_mk <= 0.0027f)
+ if (r_mk <= 0.0027f)
{
- if(r_mk <= 0.0014f)
+ if (r_mk <= 0.0014f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
}
else
{
- if(workload <= 8.9500f)
+ if (workload <= 8.9500f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
}
@@ -474,13 +422,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(workload <= 14.1500f)
+ if (workload <= 14.1500f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, 2, 0, 1, 1, 0, 0);
}
else
{
- if(r_mk <= 0.0041f)
+ if (r_mk <= 0.0041f)
{
return configure_lhs_rhs_info(m, n, 1, 2, 2, 1, 32, 0, 0, 0, 1, 0);
}
@@ -495,9 +443,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(workload <= 363.7000f)
+ if (workload <= 363.7000f)
{
- if(r_mk <= 0.0031f)
+ if (r_mk <= 0.0031f)
{
return configure_lhs_rhs_info(m, n, 1, 4, 2, 1, 32, 0, 1, 0, 1, 0);
}
@@ -514,9 +462,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(workload <= 1384.8000f)
+ if (workload <= 1384.8000f)
{
- if(workload <= 704.0000f)
+ if (workload <= 704.0000f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 4, 1, 32, 0, 1, 0, 1, 0);
}
@@ -527,9 +475,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(workload <= 16761.6006f)
+ if (workload <= 16761.6006f)
{
- if(r_mn <= 187.1250f)
+ if (r_mn <= 187.1250f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 16, 0, 0, 0, 1, 1);
}
@@ -540,7 +488,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(r_mk <= 432.4630f)
+ if (r_mk <= 432.4630f)
{
return configure_lhs_rhs_info(m, n, 5, 4, 4, 1, 16, 0, 0, 0, 1, 1);
}
@@ -553,42 +501,37 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G78_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
const float workload = (static_cast<float>(m) * static_cast<float>(n) * static_cast<float>(b)) / 20.0f;
const float r_mn = static_cast<float>(m) / static_cast<float>(n);
const float r_mk = static_cast<float>(m) / static_cast<float>(k);
const float r_nk = static_cast<float>(n) / static_cast<float>(k);
- if(m == 1)
+ if (m == 1)
{
- const GeMMConfigsMatrix configs_mnkb_best =
- {
- { 1, 8984, 640, 1, 1, 4, 2, 1, 0, 1, 0, 1, 1, 0 },
- { 1, 420, 392, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 644, 5288, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 6512, 6404, 1, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0 },
- { 1, 5304, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 4096, 25088, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 732, 8988, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 }
- };
+ const GeMMConfigsMatrix configs_mnkb_best = {
+ {1, 8984, 640, 1, 1, 4, 2, 1, 0, 1, 0, 1, 1, 0}, {1, 420, 392, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+ {1, 644, 5288, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0}, {1, 6512, 6404, 1, 1, 2, 2, 1, 0, 1, 0, 1, 1, 0},
+ {1, 5304, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0}, {1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+ {1, 4096, 25088, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0}, {1, 732, 8988, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0}};
return find_lhs_rhs_info(configs_mnkb_best, m, n, k, b);
}
else
{
- if(workload <= 1384.8000f)
+ if (workload <= 1384.8000f)
{
- if(r_nk <= 0.8333f)
+ if (r_nk <= 0.8333f)
{
- if(r_mk <= 0.9119f)
+ if (r_mk <= 0.9119f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 16, 1, 4, 0, 1, 0, 1, 1);
}
else
{
- if(r_nk <= 0.1181f)
+ if (r_nk <= 0.1181f)
{
return configure_lhs_rhs_info(m, n, 2, 2, 8, 1, 32, 0, 0, 1, 0, 0);
}
@@ -600,7 +543,7 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(r_mk <= 1.0013f)
+ if (r_mk <= 1.0013f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 32, 0, 1, 1, 0, 1);
}
@@ -612,11 +555,11 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(workload <= 11404.7998f)
+ if (workload <= 11404.7998f)
{
- if(r_mk <= 2.2884f)
+ if (r_mk <= 2.2884f)
{
- if(r_nk <= 0.9286f)
+ if (r_nk <= 0.9286f)
{
return configure_lhs_rhs_info(m, n, 4, 4, 8, 1, 4, 0, 1, 1, 0, 1);
}
@@ -632,9 +575,9 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
else
{
- if(r_nk <= 1.1926f)
+ if (r_nk <= 1.1926f)
{
- if(r_mn <= 1385.7917f)
+ if (r_mn <= 1385.7917f)
{
return configure_lhs_rhs_info(m, n, 6, 4, 8, 1, 4, 0, 1, 1, 0, 1);
}
@@ -652,12 +595,13 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f32(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
unsigned int best_m0;
unsigned int best_n0;
- if(is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
+ if (is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
{
return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
}
@@ -667,153 +611,101 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
}
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G710_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
- const GeMMConfigsMatrix configs_1nkb_best =
- {
- { 1, 8984, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 6512, 6404, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 5304, 640, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0 },
- { 1, 4096, 25088, 1, 1, 2, 8, 1, 0, 1, 0, 1, 1, 0 },
- { 1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0 }
+ const GeMMConfigsMatrix configs_1nkb_best = {
+ {1, 8984, 640, 1, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0}, {1, 420, 392, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0},
+ {1, 644, 5288, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0}, {1, 6512, 6404, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+ {1, 5304, 640, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0}, {1, 1352, 1520, 1, 1, 2, 4, 1, 0, 1, 0, 1, 0, 0},
+ {1, 4096, 25088, 1, 1, 2, 8, 1, 0, 1, 0, 1, 1, 0}, {1, 732, 8988, 1, 1, 2, 8, 1, 0, 1, 0, 1, 0, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_n_small_best = {{102400, 4, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0},
+ {102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0},
+ {16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0},
+ {16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0}};
+
+ const GeMMConfigsMatrix configs_mnkb_m_gt_n_best = {
+ {25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0}, {25584, 16, 68, 1, 2, 4, 16, 1, 8, 1, 1, 1, 0, 1},
+ {369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0}, {65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},
+ {23036, 56, 736, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {8944, 32, 776, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {2688, 136, 1492, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {50176, 64, 300, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1}, {16544, 104, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {12604, 60, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {3728, 96, 196, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0}, {12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0},
};
- const GeMMConfigsMatrix configs_mnkb_n_small_best =
- {
- { 102400, 4, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
- { 102400, 2, 96, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
- { 16384, 4, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 },
- { 16384, 2, 128, 1, 1, 2, 16, 1, 0, 1, 0, 1, 0, 0 }
+ const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback = {
+ {25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0}, {25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0},
+ {369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0}, {65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},
+ {23036, 56, 736, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0}, {90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 0},
+ {8944, 32, 776, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0}, {2688, 136, 1492, 1, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0},
+ {50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0}, {16544, 104, 160, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0},
+ {12604, 60, 160, 1, 2, 8, 8, 1, 8, 1, 1, 1, 0, 0}, {3728, 96, 196, 1, 2, 8, 8, 1, 64, 1, 1, 1, 0, 0},
+ {29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0}, {12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0},
};
- const GeMMConfigsMatrix configs_mnkb_m_gt_n_best =
- {
- { 25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0 },
- { 25584, 16, 68, 1, 2, 4, 16, 1, 8, 1, 1, 1, 0, 1 },
- { 369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
- { 23036, 56, 736, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 8944, 32, 776, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 2688, 136, 1492, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 50176, 64, 300, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1 },
- { 16544, 104, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 12604, 60, 160, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 3728, 96, 196, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
- { 12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0 },
- };
+ const GeMMConfigsMatrix configs_mnkb_n_gt_m_best = {{24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0},
+ {49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0},
+ {49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0}};
- const GeMMConfigsMatrix configs_mnkb_m_gt_n_fallback =
- {
- { 25584, 88, 16, 1, 4, 8, 4, 1, 4, 1, 1, 1, 0, 0 },
- { 25584, 16, 68, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0 },
- { 369664, 32, 28, 1, 2, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 65792, 44, 24, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
- { 23036, 56, 736, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
- { 90968, 40, 600, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 0 },
- { 8944, 32, 776, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0 },
- { 2688, 136, 1492, 1, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0 },
- { 50176, 64, 300, 1, 4, 8, 4, 1, 128, 1, 1, 1, 0, 0 },
- { 16544, 104, 160, 1, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
- { 12604, 60, 160, 1, 2, 8, 8, 1, 8, 1, 1, 1, 0, 0 },
- { 3728, 96, 196, 1, 2, 8, 8, 1, 64, 1, 1, 1, 0, 0 },
- { 29584, 32, 28, 1, 2, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
- { 12544, 32, 27, 1, 2, 8, 8, 1, 16, 1, 1, 1, 0, 0 },
- };
+ const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback = {{24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0},
+ {49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0},
+ {49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0}};
- const GeMMConfigsMatrix configs_mnkb_n_gt_m_best =
- {
- { 24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
- { 49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0 },
- { 49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0 }
+ const GeMMConfigsMatrix configs_mnkb_squared_best = {
+ {24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0}, {24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0},
+ {72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0}, {268, 824, 5076, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {180, 420, 952, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1}, {1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0},
+ {272, 400, 2116, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {196, 512, 512, 1, 5, 2, 8, 1, 4, 1, 1, 1, 1, 1},
};
- const GeMMConfigsMatrix configs_mnkb_n_gt_m_fallback =
- {
- { 24, 488, 88, 1, 2, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
- { 49, 1024, 512, 1, 2, 4, 8, 1, 8, 1, 1, 1, 1, 0 },
- { 49, 1024, 1024, 1, 2, 4, 8, 1, 4, 1, 1, 1, 1, 0 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_squared_best =
- {
- { 24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
- { 24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
- { 72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0 },
- { 268, 824, 5076, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 180, 420, 952, 1, 4, 4, 8, 1, 16, 1, 1, 1, 0, 1 },
- { 1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
- { 272, 400, 2116, 1, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 196, 512, 512, 1, 5, 2, 8, 1, 4, 1, 1, 1, 1, 1 },
+ const GeMMConfigsMatrix configs_mnkb_squared_fallback = {
+ {24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0}, {24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0},
+ {72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0}, {268, 824, 5076, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0},
+ {180, 420, 952, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0}, {1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0},
+ {272, 400, 2116, 1, 2, 8, 4, 1, 4, 1, 1, 1, 0, 0}, {196, 512, 512, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0},
};
- const GeMMConfigsMatrix configs_mnkb_squared_fallback =
- {
- { 24, 88, 236, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
- { 24, 88, 488, 1, 2, 2, 8, 1, 4, 1, 1, 1, 1, 0 },
- { 72, 92, 136, 1, 2, 2, 8, 1, 32, 1, 1, 1, 1, 0 },
- { 268, 824, 5076, 1, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
- { 180, 420, 952, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
- { 1000, 152, 304, 1, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
- { 272, 400, 2116, 1, 2, 8, 4, 1, 4, 1, 1, 1, 0, 0 },
- { 196, 512, 512, 1, 5, 2, 8, 1, 8, 1, 1, 1, 1, 0 },
- };
+ const GeMMConfigsMatrix configs_mnkb_best_batched = {
+ {3136, 64, 64, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 1}, {4096, 48, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 1}, {24, 464, 412, 24, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {112, 184, 144, 28, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1}, {5776, 64, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1},
+ {1568, 64, 40, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1}, {2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1}};
- const GeMMConfigsMatrix configs_mnkb_best_batched =
- {
- { 3136, 64, 64, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 1 },
- { 4096, 48, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 1 },
- { 24, 464, 412, 24, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 112, 184, 144, 28, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 5776, 64, 32, 36, 4, 4, 8, 1, 4, 1, 1, 1, 0, 1 },
- { 1568, 64, 40, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1 },
- { 2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 1 }
- };
-
- const GeMMConfigsMatrix configs_mnkb_fallback_batched =
- {
- { 3136, 64, 64, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 },
- { 4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0 },
- { 688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
- { 24, 464, 412, 24, 2, 8, 4, 1, 32, 1, 1, 1, 0, 0 },
- { 112, 184, 144, 28, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0 },
- { 5776, 64, 32, 36, 2, 8, 8, 1, 32, 1, 1, 1, 0, 0 },
- { 1568, 64, 40, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0 },
- { 2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0 }
- };
+ const GeMMConfigsMatrix configs_mnkb_fallback_batched = {
+ {3136, 64, 64, 36, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0}, {4096, 48, 32, 36, 4, 4, 8, 1, 64, 1, 1, 1, 0, 0},
+ {688, 92, 68, 32, 4, 8, 4, 1, 32, 1, 1, 1, 0, 0}, {24, 464, 412, 24, 2, 8, 4, 1, 32, 1, 1, 1, 0, 0},
+ {112, 184, 144, 28, 4, 4, 8, 1, 8, 1, 1, 1, 0, 0}, {5776, 64, 32, 36, 2, 8, 8, 1, 32, 1, 1, 1, 0, 0},
+ {1568, 64, 40, 36, 4, 8, 4, 1, 16, 1, 1, 1, 0, 0}, {2920, 64, 64, 24, 4, 8, 4, 1, 8, 1, 1, 1, 0, 0}};
const GeMMConfigsMatrix *configs_best_to_use = nullptr;
const GeMMConfigsMatrix *configs_fallback_to_use = nullptr;
- if(b == 1)
+ if (b == 1)
{
constexpr float ratio_m_gt_n = 10.f;
constexpr float ratio_n_gt_m = 0.1f;
constexpr unsigned int n_small_thr = 4;
const float ratio = static_cast<float>(m) / static_cast<float>(n);
- if(m == 1)
+ if (m == 1)
{
// We do not need fallback in this case, as we never use cl_image for the rhs tensor
configs_best_to_use = &configs_1nkb_best;
configs_fallback_to_use = &configs_1nkb_best;
}
- else if(n <= n_small_thr && ratio > ratio_m_gt_n)
+ else if (n <= n_small_thr && ratio > ratio_m_gt_n)
{
configs_best_to_use = &configs_mnkb_n_small_best;
configs_fallback_to_use = &configs_mnkb_n_small_best;
}
- else if(ratio > ratio_m_gt_n)
+ else if (ratio > ratio_m_gt_n)
{
configs_best_to_use = &configs_mnkb_m_gt_n_best;
configs_fallback_to_use = &configs_mnkb_m_gt_n_fallback;
}
- else if(ratio < ratio_n_gt_m)
+ else if (ratio < ratio_n_gt_m)
{
configs_best_to_use = &configs_mnkb_n_gt_m_best;
configs_fallback_to_use = &configs_mnkb_n_gt_m_fallback;
@@ -838,17 +730,17 @@ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOn
std::tie(lhs_info0, rhs_info0) = find_lhs_rhs_info(*configs_best_to_use, m, n, k, b);
std::tie(lhs_info1, rhs_info1) = find_lhs_rhs_info(*configs_fallback_to_use, m, n, k, b);
- return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0),
- std::make_pair(lhs_info1, rhs_info1),
- n, k, b, DataType::F16);
+ return select_lhs_rhs_info(std::make_pair(lhs_info0, rhs_info0), std::make_pair(lhs_info1, rhs_info1), n, k, b,
+ DataType::F16);
}
-std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b)
+std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> ClGemmDefaultConfigReshapedRhsOnlyValhall::configure_G715_f16(
+ unsigned int m, unsigned int n, unsigned int k, unsigned int b)
{
unsigned int best_m0;
unsigned int best_n0;
- if(is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
+ if (is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
{
return configure_lhs_rhs_info(m, n, best_m0, best_n0, 1, 1, 4, false, true, false, false, true);
}
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
index f2952a3d30..a0ea337eb1 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.h
@@ -45,17 +45,26 @@ public:
ClGemmDefaultConfigReshapedRhsOnlyValhall(GPUTarget gpu);
// Inherited overridden method
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
private:
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
- std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G77_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G710_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
+ std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+ configure_G715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
};
} // namespace gemm
} // namespace kernels
diff --git a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
index 1503e74eb6..e07ad993ed 100644
--- a/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
+++ b/src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmReshapedOnlyRhsKernelConfig.h
@@ -50,7 +50,7 @@ public:
*/
static std::unique_ptr<IClGemmKernelConfig> create(GPUTarget gpu)
{
- switch(get_arch_from_target(gpu))
+ switch (get_arch_from_target(gpu))
{
case GPUTarget::MIDGARD:
case GPUTarget::BIFROST: