aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp')
-rw-r--r--src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp85
1 files changed, 53 insertions, 32 deletions
diff --git a/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp
index 9ec0b5182f..d93dbde95a 100644
--- a/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp
+++ b/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp
@@ -28,11 +28,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/Cast.h"
#include "support/StringSupport.h"
@@ -44,12 +43,16 @@ namespace kernels
{
namespace
{
-Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias,
- int32_t a_offset, int32_t b_offset)
+Status validate_arguments(const ITensorInfo *mm_result,
+ const ITensorInfo *vector_sum_col,
+ const ITensorInfo *vector_sum_row,
+ const ITensorInfo *bias,
+ int32_t a_offset,
+ int32_t b_offset)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(mm_result, 1, DataType::S32);
- if(bias != nullptr)
+ if (bias != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
@@ -57,26 +60,28 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
}
// If a_offset == 0, vector_sum_col can be a nullptr
- if(a_offset != 0)
+ if (a_offset != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(vector_sum_col, 1, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON(vector_sum_col->dimension(0) != mm_result->dimension(0));
}
// If b_offset == 0, vector_sum_row can be a nullptr
- if(b_offset != 0)
+ if (b_offset != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(vector_sum_row, 1, DataType::S32);
// Check if input is a 3D reinterpretation
- const bool reinterpret_as_3d = mm_result->num_dimensions() > 1 && mm_result->tensor_shape().y() != vector_sum_row->tensor_shape().x();
+ const bool reinterpret_as_3d =
+ mm_result->num_dimensions() > 1 && mm_result->tensor_shape().y() != vector_sum_row->tensor_shape().x();
// Validate input
- ARM_COMPUTE_RETURN_ERROR_ON(reinterpret_as_3d && vector_sum_row->dimension(0) != (mm_result->dimension(1) * mm_result->dimension(2)));
+ ARM_COMPUTE_RETURN_ERROR_ON(reinterpret_as_3d && vector_sum_row->dimension(0) !=
+ (mm_result->dimension(1) * mm_result->dimension(2)));
ARM_COMPUTE_RETURN_ERROR_ON(!reinterpret_as_3d && vector_sum_row->dimension(0) != mm_result->dimension(1));
TensorShape output_shape = mm_result->tensor_shape();
- if(output_shape.num_dimensions() > 1)
+ if (output_shape.num_dimensions() > 1)
{
const unsigned int output_batch_idx = reinterpret_as_3d ? 3 : 2;
@@ -87,13 +92,15 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
ARM_COMPUTE_RETURN_ERROR_ON_MSG(vector_sum_row_shape[1] != output_shape[output_batch_idx],
"mm_result tensor must have the same number of batches of output tensor");
- if(a_offset != 0)
+ if (a_offset != 0)
{
TensorShape vector_sum_col_shape = vector_sum_col->tensor_shape();
vector_sum_col_shape.collapse_from(1);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(vector_sum_col_shape[1] != 1 && vector_sum_col_shape[1] != vector_sum_row_shape[1],
- "vector_sum_col tensor must have the same number of batches of vector_sum_row_shape or the number of batches must be set to 1");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(vector_sum_col_shape[1] != 1 &&
+ vector_sum_col_shape[1] != vector_sum_row_shape[1],
+ "vector_sum_col tensor must have the same number of batches of "
+ "vector_sum_row_shape or the number of batches must be set to 1");
}
}
}
@@ -108,29 +115,34 @@ ClGemmLowpOffsetContributionKernel::ClGemmLowpOffsetContributionKernel()
}
void ClGemmLowpOffsetContributionKernel::configure(const CLCompileContext &compile_context,
- const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias,
- int32_t k, int32_t a_offset, int32_t b_offset)
+ const ITensorInfo *mm_result,
+ const ITensorInfo *vector_sum_col,
+ const ITensorInfo *vector_sum_row,
+ const ITensorInfo *bias,
+ int32_t k,
+ int32_t a_offset,
+ int32_t b_offset)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(mm_result);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(mm_result, vector_sum_col, vector_sum_row, bias, a_offset, b_offset));
- auto padding_info = get_padding_info({ mm_result, vector_sum_col, vector_sum_row, bias });
+ auto padding_info = get_padding_info({mm_result, vector_sum_col, vector_sum_row, bias});
// Check if input is a 3D reinterpretation
- const bool reinterpret_as_3d = vector_sum_row != nullptr
- && mm_result->num_dimensions() > 1
- && mm_result->tensor_shape().y() != vector_sum_row->tensor_shape().x();
+ const bool reinterpret_as_3d = vector_sum_row != nullptr && mm_result->num_dimensions() > 1 &&
+ mm_result->tensor_shape().y() != vector_sum_row->tensor_shape().x();
const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, mm_result->dimension(0));
// Set the arguments to pass at compile time
CLBuildOptions build_opts;
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(mm_result->dimension(0) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" +
+ support::cpp11::to_string(mm_result->dimension(0) % num_elems_processed_per_iteration));
// If a_offset == 0, vector_sum_col can be a nullptr
- if(a_offset != 0)
+ if (a_offset != 0)
{
build_opts.add_option("-DA_OFFSET=" + support::cpp11::to_string(a_offset));
build_opts.add_option_if(vector_sum_col->tensor_shape().num_dimensions() > 1, "-DSUM_COL_HAS_BATCHES");
@@ -138,8 +150,10 @@ void ClGemmLowpOffsetContributionKernel::configure(const CLCompileContext &compi
// If b_offset == 0, vector_sum_row can be a nullptr
build_opts.add_option_if(b_offset != 0, "-DB_OFFSET=" + support::cpp11::to_string(b_offset));
build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(a_offset * b_offset * k));
- build_opts.add_option_if(reinterpret_as_3d, "-DHEIGHT_INPUT3D=" + support::cpp11::to_string(mm_result->dimension(1)));
- build_opts.add_option_if(reinterpret_as_3d, "-DDEPTH_INPUT3D=" + support::cpp11::to_string(mm_result->dimension(2)));
+ build_opts.add_option_if(reinterpret_as_3d,
+ "-DHEIGHT_INPUT3D=" + support::cpp11::to_string(mm_result->dimension(1)));
+ build_opts.add_option_if(reinterpret_as_3d,
+ "-DDEPTH_INPUT3D=" + support::cpp11::to_string(mm_result->dimension(2)));
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
std::string kernel_name("gemmlowp_offset_contribution");
@@ -165,10 +179,15 @@ void ClGemmLowpOffsetContributionKernel::configure(const CLCompileContext &compi
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status ClGemmLowpOffsetContributionKernel::validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias,
- int32_t a_offset, int32_t b_offset)
+Status ClGemmLowpOffsetContributionKernel::validate(const ITensorInfo *mm_result,
+ const ITensorInfo *vector_sum_col,
+ const ITensorInfo *vector_sum_row,
+ const ITensorInfo *bias,
+ int32_t a_offset,
+ int32_t b_offset)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(mm_result, vector_sum_col, vector_sum_row, bias, a_offset, b_offset));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments(mm_result, vector_sum_col, vector_sum_row, bias, a_offset, b_offset));
return Status{};
}
@@ -177,10 +196,13 @@ void ClGemmLowpOffsetContributionKernel::run_op(ITensorPack &tensors, const Wind
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IClKernel::window(), window);
- const auto vector_sum_col = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_VEC_COL_SUM));
- const auto vector_sum_row = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_VEC_ROW_SUM));
- const auto bias = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_BIAS));
- const auto mm_result = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_SRC_DST));
+ const auto vector_sum_col =
+ utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_VEC_COL_SUM));
+ const auto vector_sum_row =
+ utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_VEC_ROW_SUM));
+ const auto bias =
+ utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_BIAS));
+ const auto mm_result = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_SRC_DST));
Window collapsed = window.collapse_if_possible(IClKernel::window(), Window::DimZ);
Window slice = collapsed.first_slice_window_3D();
@@ -209,8 +231,7 @@ void ClGemmLowpOffsetContributionKernel::run_op(ITensorPack &tensors, const Wind
add_1D_tensor_argument_if((bias != nullptr), idx, bias, biases_slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace kernels
} // namespace opencl