aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp51
1 files changed, 40 insertions, 11 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
index 3888353ee7..d348f2c06d 100644
--- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp
@@ -46,11 +46,18 @@ class Coordinates;
namespace
{
-Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row,
+Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias,
int32_t a_offset, int32_t b_offset)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(mm_result, 1, DataType::S32);
+ if(bias != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(mm_result->dimension(0) != bias->dimension(0));
+ }
+
// If a_offset == 0, vector_sum_col can be a nullptr
if(a_offset != 0)
{
@@ -64,11 +71,11 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(vector_sum_row, 1, DataType::S32);
// Check if input is a 3D reinterpretation
- const bool reinterpret_as_3d = vector_sum_row != nullptr && mm_result->num_dimensions() > 1 && mm_result->tensor_shape().y() != vector_sum_row->tensor_shape().x();
+ const bool reinterpret_as_3d = mm_result->num_dimensions() > 1 && mm_result->tensor_shape().y() != vector_sum_row->tensor_shape().x();
// Validate input
ARM_COMPUTE_RETURN_ERROR_ON(reinterpret_as_3d && vector_sum_row->dimension(0) != (mm_result->dimension(1) * mm_result->dimension(2)));
- ARM_COMPUTE_RETURN_ERROR_ON(!reinterpret_as_3d && vector_sum_row != nullptr && vector_sum_row->dimension(0) != mm_result->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(!reinterpret_as_3d && vector_sum_row->dimension(0) != mm_result->dimension(1));
TensorShape output_shape = mm_result->tensor_shape();
if(output_shape.num_dimensions() > 1)
@@ -96,7 +103,7 @@ Status validate_arguments(const ITensorInfo *mm_result, const ITensorInfo *vecto
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *mm_result, ITensorInfo *vector_sum_col, ITensorInfo *vector_sum_row,
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *mm_result, ITensorInfo *vector_sum_col, ITensorInfo *vector_sum_row, ITensorInfo *bias,
int32_t a_offset, int32_t b_offset)
{
constexpr unsigned int num_elems_processed_per_iteration = 4;
@@ -119,28 +126,37 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *mm_result,
window_changed = window_changed || update_window_and_padding(win, vector_sum_row_access);
}
+ if(bias != nullptr)
+ {
+ AccessWindowStatic bias_access(bias, 0, 0, ceil_to_multiple(bias->dimension(0), num_elems_processed_per_iteration), bias->tensor_shape()[1]);
+ window_changed = window_changed || update_window_and_padding(win, bias_access);
+ }
+
Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
CLGEMMLowpOffsetContributionKernel::CLGEMMLowpOffsetContributionKernel()
- : _vector_sum_col(nullptr), _vector_sum_row(nullptr), _mm_result(nullptr)
+ : _vector_sum_col(nullptr), _vector_sum_row(nullptr), _mm_result(nullptr), _bias(nullptr)
{
}
-void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset)
+void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
+ int32_t b_offset)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(mm_result);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(mm_result->info(),
vector_sum_col != nullptr ? vector_sum_col->info() : nullptr,
vector_sum_row != nullptr ? vector_sum_row->info() : nullptr,
+ bias != nullptr ? bias->info() : nullptr,
a_offset, b_offset)); // NOLINT
_vector_sum_col = vector_sum_col;
_vector_sum_row = vector_sum_row;
_mm_result = mm_result;
+ _bias = bias;
// Check if input is a 3D reinterpretation
const bool reinterpret_as_3d = vector_sum_row != nullptr
@@ -161,20 +177,24 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I
build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(a_offset * b_offset * k));
build_opts.add_option_if(reinterpret_as_3d, "-DHEIGHT_INPUT3D=" + support::cpp11::to_string(mm_result->info()->dimension(1)));
build_opts.add_option_if(reinterpret_as_3d, "-DDEPTH_INPUT3D=" + support::cpp11::to_string(mm_result->info()->dimension(2)));
+ build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
+
+ std::string kernel_name("gemmlowp_offset_contribution");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_offset_contribution", build_opts.options()));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel window
auto win_config = validate_and_configure_window(mm_result->info(),
vector_sum_col != nullptr ? vector_sum_col->info() : nullptr,
vector_sum_row != nullptr ? vector_sum_row->info() : nullptr,
+ bias != nullptr ? bias->info() : nullptr,
a_offset, b_offset); // NOLINT
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
- _config_id = "gemmlowp_offset_contribution_";
+ _config_id = kernel_name + "_";
_config_id += support::cpp11::to_string(mm_result->info()->dimension(0));
_config_id += "_";
_config_id += support::cpp11::to_string(mm_result->info()->dimension(1));
@@ -182,13 +202,14 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I
_config_id += support::cpp11::to_string(mm_result->info()->dimension(2));
}
-Status CLGEMMLowpOffsetContributionKernel::validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row,
+Status CLGEMMLowpOffsetContributionKernel::validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias,
int32_t a_offset, int32_t b_offset)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(mm_result, vector_sum_col, vector_sum_row, a_offset, b_offset));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(mm_result, vector_sum_col, vector_sum_row, bias, a_offset, b_offset));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(mm_result->clone().get(),
vector_sum_col != nullptr ? vector_sum_col->clone().get() : nullptr,
vector_sum_row != nullptr ? vector_sum_row->clone().get() : nullptr,
+ bias != nullptr ? bias->clone().get() : nullptr,
a_offset, b_offset)
.first); // NOLINT
@@ -214,6 +235,10 @@ void CLGEMMLowpOffsetContributionKernel::run(const Window &window, cl::CommandQu
win_vector_sum_row.set(Window::DimY, Window::Dimension(0, 0, 0));
win_vector_sum_col.set(Window::DimZ, Window::Dimension(0, 0, 0));
+ Window biases_slice = slice;
+ biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
+ biases_slice.set(Window::DimZ, Window::Dimension(0, 1, 1));
+
do
{
unsigned int idx = 0;
@@ -226,7 +251,11 @@ void CLGEMMLowpOffsetContributionKernel::run(const Window &window, cl::CommandQu
{
add_2D_tensor_argument(idx, _vector_sum_row, win_vector_sum_row);
}
- enqueue(queue, *this, slice);
+ if(_bias != nullptr)
+ {
+ add_1D_tensor_argument(idx, _bias, biases_slice);
+ }
+ enqueue(queue, *this, slice, lws_hint());
}
while(collapsed.slide_window_slice_3D(slice));
}