diff options
Diffstat (limited to 'src/core/CL/kernels/CLFillBorderKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLFillBorderKernel.cpp | 63 |
1 files changed, 36 insertions, 27 deletions
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index 840ed0ca2f..86bb502da3 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,16 +29,18 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/Validate.h" + #include "src/core/helpers/WindowHelpers.h" #include "support/Cast.h" #include "support/StringSupport.h" namespace arm_compute { -CLFillBorderKernel::CLFillBorderKernel() - : ICLKernel(), _tensor(nullptr) +CLFillBorderKernel::CLFillBorderKernel() : ICLKernel(), _tensor(nullptr) { + _type = CLKernelType::ELEMENTWISE; } bool CLFillBorderKernel::is_parallelisable() const @@ -54,27 +56,38 @@ void CLFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue ICLKernel::add_argument<T>(idx, static_cast<T>(value)); } -void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +void CLFillBorderKernel::configure(ICLTensor *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), tensor, border_size, border_mode, constant_border_value); } -void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +void CLFillBorderKernel::configure(const CLCompileContext &compile_context, + ICLTensor *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value) { _tensor = tensor; configure(compile_context, tensor->info(), border_size, border_mode, constant_border_value); } -void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +void CLFillBorderKernel::configure(const CLCompileContext &compile_context, + ITensorInfo *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); ARM_COMPUTE_ERROR_ON(tensor->num_channels() != 1); - auto padding_info = get_padding_info({ tensor }); + auto padding_info = get_padding_info({tensor}); border_size.limit(tensor->padding()); // If there is no border: early exit - if(border_size.empty() || border_mode == BorderMode::UNDEFINED) + if (border_size.empty() || border_mode == BorderMode::UNDEFINED) { return; } @@ -96,25 +109,22 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Create static kernel arguments - const unsigned int valid_width = tensor->valid_region().shape[0]; - const unsigned int valid_height = tensor->valid_region().shape[1]; - const cl_int2 valid_region_coords = - { - { - static_cast<cl_int>(tensor->valid_region().anchor[0]), - static_cast<cl_int>(tensor->valid_region().anchor[1]), - } - }; - const unsigned int total_valid_width = border_size.left + valid_width + border_size.right; + const unsigned int valid_width = tensor->valid_region().shape[0]; + const unsigned int valid_height = tensor->valid_region().shape[1]; + const cl_int2 valid_region_coords = {{ + static_cast<cl_int>(tensor->valid_region().anchor[0]), + static_cast<cl_int>(tensor->valid_region().anchor[1]), + }}; + const unsigned int total_valid_width = border_size.left + valid_width + border_size.right; // Set static kernel arguments unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters ICLKernel::add_argument<cl_uint>(idx, valid_width); ICLKernel::add_argument<cl_uint>(idx, valid_height); ICLKernel::add_argument<cl_int2>(idx, valid_region_coords); - if(BorderMode::CONSTANT == border_mode) + if (BorderMode::CONSTANT == border_mode) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::QASYMM8: @@ -173,12 +183,13 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { // Border mode undefined or border width == 0 - if(_kernel() == nullptr) + if (_kernel() == nullptr) { return; } - const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC)); + const auto tensor = + utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC)); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); @@ -191,14 +202,13 @@ void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl:: unsigned int idx = 0; add_3D_tensor_argument(idx, tensor, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) { // Border mode undefined or border width == 0 - if(_kernel() == nullptr) + if (_kernel() == nullptr) { return; } @@ -214,7 +224,6 @@ void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _tensor, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute |