aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-10-07 17:17:16 +0100
committerSheri Zhang <sheri.zhang@arm.com>2020-10-20 14:12:05 +0000
commited2a8ed7efb54ae0ec539b8735820b2c30756c44 (patch)
tree4de3adfd3fb8fa3d5a8b1df3f36e0db46d32a2b2 /src/core/CL/kernels/CLElementwiseOperationKernel.cpp
parent68dd25fbe6e4d3c3513fa5993863419769aa08fc (diff)
downloadComputeLibrary-ed2a8ed7efb54ae0ec539b8735820b2c30756c44.tar.gz
COMPMID-3715: Remove OpenCL padding - CLElementwiseOperationKernel
Change-Id: I8a685d4ac5de747a0f775bd10be9c411cf394953 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4140 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLElementwiseOperationKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLElementwiseOperationKernel.cpp50
1 files changed, 15 insertions, 35 deletions
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
index f0712f5863..da28f3d886 100644
--- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
+++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp
@@ -36,7 +36,7 @@ namespace arm_compute
{
namespace
{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
+constexpr unsigned int vector_size_byte_opencl = 16;
std::map<ArithmeticOperation, std::string> supported_arithmetic_ops =
{
@@ -152,10 +152,15 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
{
CLBuildOptions build_opts;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / output.element_size(), output.dimension(0));
+
build_opts.add_option("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1.data_type()));
build_opts.add_option("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2.data_type()));
build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output.data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_IN1=" + support::cpp11::to_string(input1.dimension(0) == 1 ? 1 : num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_IN2=" + support::cpp11::to_string(input2.dimension(0) == 1 ? 1 : num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(output.dimension(0) % num_elems_processed_per_iteration));
build_opts.add_option("-DOP=" + operation_string);
if(is_data_type_quantized(input1.data_type()))
{
@@ -173,31 +178,17 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i
return build_opts;
}
-std::pair<Status, Window> configure_window_arithmetic_common(const ValidRegion &valid_region, ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
+std::pair<Status, Window> configure_window_arithmetic_common(ITensorInfo &output)
{
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(input1);
- Window win_input2 = win.broadcast_if_dimension_le_one(input2);
-
- AccessWindowHorizontal input1_access(&input1, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(&input2, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access)
- || update_window_and_padding(win_input2, input2_access)
- || update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / output.element_size(), output.dimension(0));
+ Window win = calculate_max_window(output, Steps(num_elems_processed_per_iteration));
+ return std::make_pair(Status{}, win);
}
std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
{
const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = broadcast_pair.first;
set_shape_if_empty(output, out_shape);
@@ -226,16 +217,15 @@ std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators
set_data_type_if_unknown(output, DataType::QSYMM16);
}
- return configure_window_arithmetic_common(valid_region, input1, input2, output);
+ return configure_window_arithmetic_common(output);
}
std::pair<Status, Window> validate_and_configure_window_for_division(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
{
const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
+ const TensorShape &out_shape = broadcast_pair.first;
auto_init_if_empty(output, out_shape, 1, input1.data_type());
- return configure_window_arithmetic_common(valid_region, input1, input2, output);
+ return configure_window_arithmetic_common(output);
}
} // namespace
@@ -315,30 +305,20 @@ void CLElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &wi
Window slice = collapsed.first_slice_window_3D();
Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
do
{
unsigned int idx = 0;
-
add_3D_tensor_argument(idx, src_0, slice_input1);
add_3D_tensor_argument(idx, src_1, slice_input2);
add_3D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
-
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
}
while(collapsed.slide_window_slice_3D(slice));
}
-BorderSize CLElementwiseOperationKernel::border_size() const
-{
- const unsigned int replicateSize = _output->dimension(0) - std::min(_input1->dimension(0), _input2->dimension(0));
- const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize{ 0, border, 0, 0 };
-}
-
/** Arithmetic operations with saturation*/
void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy,