aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClGemm.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/operators/ClGemm.cpp')
-rw-r--r--src/gpu/cl/operators/ClGemm.cpp472
1 files changed, 299 insertions, 173 deletions
diff --git a/src/gpu/cl/operators/ClGemm.cpp b/src/gpu/cl/operators/ClGemm.cpp
index 7e331a86f3..815c254c69 100644
--- a/src/gpu/cl/operators/ClGemm.cpp
+++ b/src/gpu/cl/operators/ClGemm.cpp
@@ -33,11 +33,12 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/MemoryHelpers.h"
#include "src/core/utils/helpers/float_ops.h"
@@ -45,8 +46,6 @@
#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
-
-#include "src/common/utils/Log.h"
#include "support/Cast.h"
#include "utils/TypePrinter.h"
@@ -67,35 +66,43 @@ inline bool validate_gemm_kernel(CLGEMMKernelType kernel_type)
return kernel_type == CLGEMMKernelType::NATIVE ? false : true;
}
//Automatically select between mlgo (prioritized) and default heuristics for gemm kernel type
-inline CLGEMMKernelType auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run, bool constant_weights)
+inline CLGEMMKernelType
+auto_select_gemm_kernel(auto_heuristics::CommonQuery query, bool reshape_b_only_on_first_run, bool constant_weights)
{
- if(!constant_weights)
+ if (!constant_weights)
{
return CLGEMMKernelType::NATIVE;
}
auto gemm_kernel = auto_heuristics::select_mlgo_gemm_kernel(query, reshape_b_only_on_first_run);
- if(bool(gemm_kernel))
+ if (bool(gemm_kernel))
{
- if(validate_gemm_kernel(gemm_kernel.gemm_type))
+ if (validate_gemm_kernel(gemm_kernel.gemm_type))
{
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from mlgo heuristics: %s.",
+ to_string(gemm_kernel.gemm_type).c_str());
return gemm_kernel.gemm_type;
}
}
gemm_kernel = auto_heuristics::select_default_gemm_kernel(query, reshape_b_only_on_first_run);
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.", to_string(gemm_kernel.gemm_type).c_str());
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use gemm kernel from default heuristics: %s.",
+ to_string(gemm_kernel.gemm_type).c_str());
return gemm_kernel.gemm_type;
}
// Validate lhs_info and rhs_info for reshaped only rhs kernel
-inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
- const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info)
+inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ GEMMKernelInfo gemm_kernel_info)
{
// Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped only rhs kernel
TensorInfo tmp_b_info{};
// Validate reshape RHS kernel
auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
+ if (!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
{
return false;
}
@@ -103,12 +110,14 @@ inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs
gemm_kernel_info.lhs_info = lhs_info;
gemm_kernel_info.rhs_info = rhs_info;
gemm_kernel_info.has_pad_y = false;
- if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
+ if (!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info,
+ rhs_info, gemm_kernel_info)))
{
return false;
}
gemm_kernel_info.has_pad_y = true;
- if(!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
+ if (!bool(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, 1.f, 0.f, lhs_info,
+ rhs_info, gemm_kernel_info)))
{
return false;
}
@@ -116,49 +125,65 @@ inline bool validate_lhs_rhs_info_reshaped_only_rhs(const GEMMLHSMatrixInfo &lhs
}
//Automatically select between mlgo (prioritized) and default heuristics for reshaped only rhs kernel configs
-inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a,
- const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output)
+inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery query,
+ GEMMKernelInfo kernel_info,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output)
{
auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(query);
- if(config)
+ if (config)
{
- if(validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info))
+ if (validate_lhs_rhs_info_reshaped_only_rhs(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info))
{
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(
+ "Use reshaped_only_rhs config from mlgo heuristics: LHS info: %s ; RHS info: %s ",
+ to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+ return {config.lhs_info, config.rhs_info};
}
}
config = auto_heuristics::select_default_gemm_config_reshaped_only_rhs(query);
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(
+ "Use reshaped_only_rhs config from default heuristics: LHS info: %s ; RHS info: %s ",
+ to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+ return {config.lhs_info, config.rhs_info};
}
// Validate lhs_info and rhs_info for reshaped kernel
-inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c,
- const ITensorInfo *output, GEMMKernelInfo gemm_kernel_info, bool reinterpret_input_as_3d)
+inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ GEMMKernelInfo gemm_kernel_info,
+ bool reinterpret_input_as_3d)
{
// Validate GEMMLHSMatrixInfo and GEMMRHSMatrixInfo for reshaped kernel
TensorInfo tmp_a_info{};
TensorInfo tmp_b_info{};
// Validate reshape LHS kernel
- auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, reinterpret_input_as_3d)));
- if(!bool(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, reinterpret_input_as_3d)))
+ auto_init_if_empty(tmp_a_info,
+ a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, reinterpret_input_as_3d)));
+ if (!bool(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, reinterpret_input_as_3d)))
{
return false;
}
// Validate reshape RHS kernel
auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
- if(!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
+ if (!bool(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info)))
{
return false;
}
// Validate mm kernel
gemm_kernel_info.lhs_info = lhs_info;
gemm_kernel_info.rhs_info = rhs_info;
- if(!bool(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, 1.f, 0.f, lhs_info, rhs_info, gemm_kernel_info)))
+ if (!bool(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, 1.f, 0.f, lhs_info,
+ rhs_info, gemm_kernel_info)))
{
return false;
}
@@ -166,21 +191,32 @@ inline bool validate_lhs_rhs_info_reshaped(const GEMMLHSMatrixInfo &lhs_info, co
}
//Automatically select between mlgo (prioritized) and default heuristics for reshaped kernel configs
-inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery query, GEMMKernelInfo kernel_info, const ITensorInfo *a, const ITensorInfo *b,
- const ITensorInfo *c, const ITensorInfo *output, bool reinterpret_input_as_3d)
+inline std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo>
+auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery query,
+ GEMMKernelInfo kernel_info,
+ const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ bool reinterpret_input_as_3d)
{
auto config = auto_heuristics::select_mlgo_gemm_config_reshaped(query);
- if(config)
+ if (config)
{
- if(validate_lhs_rhs_info_reshaped(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info, reinterpret_input_as_3d))
+ if (validate_lhs_rhs_info_reshaped(config.lhs_info, config.rhs_info, a, b, c, output, kernel_info,
+ reinterpret_input_as_3d))
{
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from mlgo heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(
+ "Use reshaped config from mlgo heuristics: LHS info: %s ; RHS info: %s ",
+ to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
+ return {config.lhs_info, config.rhs_info};
}
}
config = auto_heuristics::select_default_gemm_config_reshaped(query);
- ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("Use reshaped config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(), to_string(config.rhs_info).c_str());
- return { config.lhs_info, config.rhs_info };
+ ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(
+ "Use reshaped config from default heuristics: LHS info: %s ; RHS info: %s ", to_string(config.lhs_info).c_str(),
+ to_string(config.rhs_info).c_str());
+ return {config.lhs_info, config.rhs_info};
}
} // namespace
@@ -200,18 +236,24 @@ ClGemm::ClGemm()
{
}
-void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
+void ClGemm::configure_native(const CLCompileContext &compile_context,
+ ITensorInfo *a,
+ ITensorInfo *b,
+ ITensorInfo *c,
+ ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -225,24 +267,32 @@ void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorIn
// Set the target for the kernels
_mm_native_kernel->set_target(gpu_target);
- auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
+ auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(
+ auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size});
// Configure and tune matrix multiply kernel
- _mm_native_kernel->configure(compile_context, a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info);
+ _mm_native_kernel->configure(compile_context, a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info,
+ kernel_info);
}
-void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
+void ClGemm::configure_reshaped(const CLCompileContext &compile_context,
+ ITensorInfo *a,
+ ITensorInfo *b,
+ ITensorInfo *c,
+ ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -261,32 +311,42 @@ void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensor
GEMMRHSMatrixInfo rhs_info{};
// Pick up the GEMM configuration
- std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b,
- c, output, gemm_info.reinterpret_input_as_3d());
+ std::tie(lhs_info, rhs_info) =
+ auto_select_gemm_config_reshaped(auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size},
+ kernel_info, a, b, c, output, gemm_info.reinterpret_input_as_3d());
_reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
// Configure and tune matrix multiply kernel
- _mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_kernel->configure(compile_context, &_tmp_a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info,
+ kernel_info);
// Request memory for LHS and RHS reshape matrix
_aux_mem[LhsReshape] = MemoryInfo(offset_int_vec(LhsReshape), MemoryLifetime::Temporary, _tmp_a.total_size());
- _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
+ _aux_mem[RhsReshape] = MemoryInfo(
+ offset_int_vec(RhsReshape),
+ _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
}
-void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
+void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context,
+ ITensorInfo *a,
+ ITensorInfo *b,
+ ITensorInfo *c,
+ ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -304,7 +364,8 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context
GEMMRHSMatrixInfo rhs_info{};
// Pick up the GEMM configuration
- std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }, kernel_info, a, b, c, output);
+ std::tie(lhs_info, rhs_info) = auto_select_gemm_config_reshaped_only_rhs(
+ auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size}, kernel_info, a, b, c, output);
// Transpose matrix
_reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
@@ -315,24 +376,33 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context
// Configure matrix multiply kernel with no y padding support
kernel_info.has_pad_y = false;
- _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info,
+ kernel_info);
// Request memory for RHS reshape matrix
- _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
+ _aux_mem[RhsReshape] = MemoryInfo(
+ offset_int_vec(RhsReshape),
+ _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
}
-void ClGemm::configure_reshaped_only_rhs_mmul(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta,
- const GEMMInfo &gemm_info)
+void ClGemm::configure_reshaped_only_rhs_mmul(const CLCompileContext &compile_context,
+ ITensorInfo *a,
+ ITensorInfo *b,
+ ITensorInfo *c,
+ ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const GPUTarget gpu_target = CLScheduler::get().target();
- bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -350,9 +420,10 @@ void ClGemm::configure_reshaped_only_rhs_mmul(const CLCompileContext &compile_co
GEMMRHSMatrixInfo rhs_info{};
// Pick up the GEMM configuration
- auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
- lhs_info = gemm_config.lhs_info;
- rhs_info = gemm_config.rhs_info;
+ auto gemm_config = select_default_gemm_config_reshaped_only_rhs(
+ auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size});
+ lhs_info = gemm_config.lhs_info;
+ rhs_info = gemm_config.rhs_info;
// Force H0 to 4 in order to use the MMUL extension
rhs_info.h0 = 4;
@@ -361,13 +432,22 @@ void ClGemm::configure_reshaped_only_rhs_mmul(const CLCompileContext &compile_co
// Configure matrix multiply kernel with no y padding support
kernel_info.has_pad_y = false;
- _mm_reshaped_only_rhs_mmul_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_only_rhs_mmul_kernel->configure(compile_context, a, &_tmp_b, c, output, alpha, beta, lhs_info,
+ rhs_info, kernel_info);
// Request memory for RHS reshape matrix
- _aux_mem[RhsReshape] = MemoryInfo(offset_int_vec(RhsReshape), _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
+ _aux_mem[RhsReshape] = MemoryInfo(
+ offset_int_vec(RhsReshape),
+ _reshape_b_only_on_first_run ? MemoryLifetime::Persistent : MemoryLifetime::Temporary, _tmp_b.total_size());
}
-Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
+Status ClGemm::validate_native(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_UNUSED(output);
@@ -376,12 +456,12 @@ Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const
const GPUTarget gpu_target = CLScheduler::get().target();
DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -392,15 +472,23 @@ Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const
kernel_info.broadcast_bias = broadcast_bias;
kernel_info.activation_info = gemm_info.activation_info();
- auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
+ auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(
+ auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size});
// Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyNativeKernel::validate(a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyNativeKernel::validate(
+ a, b, c, output, alpha, beta, config.lhs_info, config.rhs_info, kernel_info));
return Status{};
}
-Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
+Status ClGemm::validate_reshaped(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_UNUSED(output);
@@ -412,12 +500,12 @@ Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, con
const GPUTarget gpu_target = CLScheduler::get().target();
DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -433,23 +521,33 @@ Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, con
// Pick up the GEMM configuration
// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
- const auto gemm_config = select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
- lhs_info = gemm_config.lhs_info;
- rhs_info = gemm_config.rhs_info;
+ const auto gemm_config =
+ select_default_gemm_config_reshaped(auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size});
+ lhs_info = gemm_config.lhs_info;
+ rhs_info = gemm_config.rhs_info;
- auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));
+ auto_init_if_empty(tmp_a_info, a->clone()->set_tensor_shape(
+ compute_lhs_reshaped_shape(*a, lhs_info, gemm_info.reinterpret_input_as_3d())));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ClGemmReshapeLhsMatrixKernel::validate(a, &tmp_a_info, lhs_info, gemm_info.reinterpret_input_as_3d()));
auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
// Validate matrix multiply
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedKernel::validate(&tmp_a_info, &tmp_b_info, c, output, alpha,
+ beta, lhs_info, rhs_info, kernel_info));
return Status{};
}
-Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
+Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_UNUSED(output);
@@ -460,12 +558,12 @@ Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf
const GPUTarget gpu_target = CLScheduler::get().target();
const DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -481,24 +579,33 @@ Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf
// Pick up the GEMM configuration
// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
- const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
- lhs_info = gemm_config.lhs_info;
- rhs_info = gemm_config.rhs_info;
+ const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(
+ auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size});
+ lhs_info = gemm_config.lhs_info;
+ rhs_info = gemm_config.rhs_info;
auto_init_if_empty(tmp_b_info, b->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*b, rhs_info)));
ARM_COMPUTE_RETURN_ON_ERROR(ClGemmReshapeRhsMatrixKernel::validate(b, &tmp_b_info, rhs_info));
// Validate matrix multiply
kernel_info.has_pad_y = false;
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(
+ a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
kernel_info.has_pad_y = true;
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsKernel::validate(
+ a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
return Status{};
}
-Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
+Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
ARM_COMPUTE_UNUSED(alpha);
ARM_COMPUTE_UNUSED(output);
@@ -508,12 +615,12 @@ Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a, const ITens
const GPUTarget gpu_target = CLScheduler::get().target();
const DataType data_type = a->data_type();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
- const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
- const bool broadcast_bias = gemm_info.broadcast_bias();
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+ const bool broadcast_bias = gemm_info.broadcast_bias();
GEMMKernelInfo kernel_info;
kernel_info.m = m;
@@ -529,9 +636,10 @@ Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a, const ITens
// Pick up the GEMM configuration
// NOTE: No need to validate mlgo configurations as they automatically fall back to default heuristics if validation fails
- const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size });
- lhs_info = gemm_config.lhs_info;
- rhs_info = gemm_config.rhs_info;
+ const auto gemm_config = select_default_gemm_config_reshaped_only_rhs(
+ auto_heuristics::CommonQuery{gpu_target, data_type, m, n, k, batch_size});
+ lhs_info = gemm_config.lhs_info;
+ rhs_info = gemm_config.rhs_info;
// Force H0 to 4 in order to use the MMUL extension
rhs_info.h0 = 4;
@@ -540,12 +648,20 @@ Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a, const ITens
// Validate matrix multiply
kernel_info.has_pad_y = false;
- ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel::validate(a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel::validate(
+ a, &tmp_b_info, c, output, alpha, beta, lhs_info, rhs_info, kernel_info));
return Status{};
}
-void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a, ITensorInfo *b, ITensorInfo *c, ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
+void ClGemm::configure(const CLCompileContext &compile_context,
+ ITensorInfo *a,
+ ITensorInfo *b,
+ ITensorInfo *c,
+ ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
@@ -558,20 +674,21 @@ void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a,
_is_prepared = gemm_info.retain_internal_weights();
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
// Select GEMMType
- _gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery{ CLScheduler::get().target(), a->data_type(), m, n, k, batch_size }, _reshape_b_only_on_first_run,
- b->are_values_constant());
+ _gemm_kernel_type = auto_select_gemm_kernel(
+ auto_heuristics::CommonQuery{CLScheduler::get().target(), a->data_type(), m, n, k, batch_size},
+ _reshape_b_only_on_first_run, b->are_values_constant());
const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);
ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;
- switch(_gemm_kernel_type)
+ switch (_gemm_kernel_type)
{
case CLGEMMKernelType::NATIVE:
{
@@ -600,35 +717,41 @@ void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a,
}
}
-Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info)
+Status ClGemm::validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ float alpha,
+ float beta,
+ const GEMMInfo &gemm_info)
{
// Get the GPU target
bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
- const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
- const unsigned int n = b->dimension(0);
- const unsigned int k = a->dimension(0);
- const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
+ const unsigned int m = reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const unsigned int batch_size = reinterpret_input_as_3d ? a->dimension(3) : a->dimension(2);
// Check data type early because the auto_select_gemm_kernel has assertions on supported data types
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32, DataType::F16);
// Select GEMMType
- CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery
- {
- CLScheduler::get().target(),
- a->data_type(),
- m,
- n,
- k,
- batch_size,
- },
- gemm_info.reshape_b_only_on_first_run(), b->are_values_constant());
+ CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(
+ auto_heuristics::CommonQuery{
+ CLScheduler::get().target(),
+ a->data_type(),
+ m,
+ n,
+ k,
+ batch_size,
+ },
+ gemm_info.reshape_b_only_on_first_run(), b->are_values_constant());
const bool fuse_add_c = (!(helpers::float_ops::is_zero(beta)) && c != nullptr);
const ITensorInfo *c_to_use = fuse_add_c ? c : nullptr;
- switch(gemm_kernel_type)
+ switch (gemm_kernel_type)
{
case CLGEMMKernelType::NATIVE:
{
@@ -647,7 +770,8 @@ Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso
}
case CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL:
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_reshaped_only_rhs_mmul(a, b, c_to_use, output, alpha, beta, gemm_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_reshaped_only_rhs_mmul(a, b, c_to_use, output, alpha, beta, gemm_info));
break;
}
default:
@@ -674,7 +798,7 @@ void ClGemm::run(ITensorPack &tensors)
prepare(tensors);
// Run matrix multiply kernel
- switch(_gemm_kernel_type)
+ switch (_gemm_kernel_type)
{
case CLGEMMKernelType::NATIVE:
{
@@ -684,13 +808,13 @@ void ClGemm::run(ITensorPack &tensors)
case CLGEMMKernelType::RESHAPED:
{
// Run interleave kernel
- ITensorPack reshape_lhs_pack{ { ACL_SRC, lhs }, { ACL_DST, lhs_reshaped.get() } };
+ ITensorPack reshape_lhs_pack{{ACL_SRC, lhs}, {ACL_DST, lhs_reshaped.get()}};
CLScheduler::get().enqueue_op(*_reshape_lhs_kernel, reshape_lhs_pack, false);
- if(!_reshape_b_only_on_first_run)
+ if (!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
+ ITensorPack reshape_rhs_pack{{ACL_SRC, rhs}, {ACL_DST, rhs_reshaped.get()}};
CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
}
// Copy original tensor pack and overwrite lhs and rhs with reshaped counterparts
@@ -698,7 +822,7 @@ void ClGemm::run(ITensorPack &tensors)
gemm_reshaped_pack.add_const_tensor(ACL_SRC_0, lhs_reshaped.get());
gemm_reshaped_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());
- if(_gemm_kernel_type == CLGEMMKernelType::RESHAPED)
+ if (_gemm_kernel_type == CLGEMMKernelType::RESHAPED)
{
CLScheduler::get().enqueue_op(*_mm_reshaped_kernel, gemm_reshaped_pack, true);
}
@@ -706,10 +830,10 @@ void ClGemm::run(ITensorPack &tensors)
}
case CLGEMMKernelType::RESHAPED_ONLY_RHS:
{
- if(!_reshape_b_only_on_first_run)
+ if (!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
+ ITensorPack reshape_rhs_pack{{ACL_SRC, rhs}, {ACL_DST, rhs_reshaped.get()}};
CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
}
// In case of RESHAPED_ONLY_RHS, we need to check the padding requirement
@@ -722,7 +846,7 @@ void ClGemm::run(ITensorPack &tensors)
ITensorPack gemm_reshaped_onlyrhs_pack(tensors);
gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());
- if(has_pad_y)
+ if (has_pad_y)
{
ARM_COMPUTE_ERROR_ON(has_pad_y);
}
@@ -734,10 +858,10 @@ void ClGemm::run(ITensorPack &tensors)
}
case CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL:
{
- if(!_reshape_b_only_on_first_run)
+ if (!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- ITensorPack reshape_rhs_pack{ { ACL_SRC, rhs }, { ACL_DST, rhs_reshaped.get() } };
+ ITensorPack reshape_rhs_pack{{ACL_SRC, rhs}, {ACL_DST, rhs_reshaped.get()}};
CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, false);
}
// In case of RESHAPED_ONLY_RHS, we need to check the padding requirement
@@ -750,7 +874,7 @@ void ClGemm::run(ITensorPack &tensors)
ITensorPack gemm_reshaped_onlyrhs_pack(tensors);
gemm_reshaped_onlyrhs_pack.add_const_tensor(ACL_SRC_1, rhs_reshaped.get());
- if(has_pad_y)
+ if (has_pad_y)
{
ARM_COMPUTE_ERROR_ON(has_pad_y);
}
@@ -769,20 +893,22 @@ void ClGemm::run(ITensorPack &tensors)
void ClGemm::prepare(ITensorPack &constants)
{
- if(!_is_prepared)
+ if (!_is_prepared)
{
- const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1);
- ICLTensor *rhs_aux = utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));
+ const ITensor *src1 = constants.get_const_tensor(ACL_SRC_1);
+ ICLTensor *rhs_aux =
+ utils::cast::polymorphic_downcast<ICLTensor *>(constants.get_tensor(offset_int_vec(RhsReshape)));
// If memory for RHS is persistent and src1 is provided re-transform else assume that RHS is transformed
- if((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) && (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)
+ if ((_aux_mem[AuxTensorIdx::RhsReshape].lifetime == MemoryLifetime::Persistent) &&
+ (src1 != nullptr && rhs_aux != nullptr) && rhs_aux)
{
ARM_COMPUTE_LOG_INFO_WITH_FUNCNAME_ACL("Transforming RHS Matrix!");
CLAuxTensorHandler rhs_reshaped(_tmp_b, *rhs_aux);
ARM_COMPUTE_ERROR_ON(rhs_reshaped.get()->cl_buffer().get() == nullptr);
- ITensorPack reshape_rhs_pack{ { ACL_SRC, src1 }, { ACL_DST, rhs_reshaped.get() } };
+ ITensorPack reshape_rhs_pack{{ACL_SRC, src1}, {ACL_DST, rhs_reshaped.get()}};
CLScheduler::get().enqueue_op(*_reshape_rhs_kernel, reshape_rhs_pack, true);
}
_is_prepared = true;