From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp | 80 ++++++--- src/core/CL/kernels/CLArgMinMaxLayerKernel.h | 10 +- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 125 ++++++++----- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 32 +++- src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp | 76 +++++--- src/core/CL/kernels/CLBatchToSpaceLayerKernel.h | 25 ++- src/core/CL/kernels/CLBitwiseKernel.cpp | 25 +-- src/core/CL/kernels/CLBitwiseKernel.h | 6 +- .../CL/kernels/CLBoundingBoxTransformKernel.cpp | 42 +++-- src/core/CL/kernels/CLBoundingBoxTransformKernel.h | 16 +- .../CL/kernels/CLChannelShuffleLayerKernel.cpp | 64 ++++--- src/core/CL/kernels/CLChannelShuffleLayerKernel.h | 5 +- src/core/CL/kernels/CLComparisonKernel.cpp | 75 ++++---- src/core/CL/kernels/CLComparisonKernel.h | 14 +- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 25 +-- .../kernels/CLDeconvolutionLayerUpsampleKernel.h | 5 +- .../kernels/CLDeconvolutionReshapeOutputKernel.cpp | 84 ++++++--- .../kernels/CLDeconvolutionReshapeOutputKernel.h | 23 ++- src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp | 29 +-- src/core/CL/kernels/CLDepthToSpaceLayerKernel.h | 4 +- .../CLDepthwiseConvolutionLayerNativeKernel.cpp | 199 +++++++++++++-------- .../CLDepthwiseConvolutionLayerNativeKernel.h | 45 +++-- src/core/CL/kernels/CLFFTDigitReverseKernel.cpp | 42 +++-- src/core/CL/kernels/CLFFTDigitReverseKernel.h | 18 +- src/core/CL/kernels/CLFFTRadixStageKernel.cpp | 46 ++--- src/core/CL/kernels/CLFFTRadixStageKernel.h | 9 +- src/core/CL/kernels/CLFFTScaleKernel.cpp | 26 +-- src/core/CL/kernels/CLFFTScaleKernel.h | 9 +- src/core/CL/kernels/CLFillBorderKernel.cpp | 59 +++--- src/core/CL/kernels/CLFillBorderKernel.h | 18 +- .../CL/kernels/CLFuseBatchNormalizationKernel.cpp | 129 ++++++++----- .../CL/kernels/CLFuseBatchNormalizationKernel.h | 41 +++-- src/core/CL/kernels/CLGatherKernel.cpp | 36 ++-- src/core/CL/kernels/CLGatherKernel.h | 10 +- .../CL/kernels/CLGenerateProposalsLayerKernel.cpp | 33 ++-- .../CL/kernels/CLGenerateProposalsLayerKernel.h | 7 +- .../kernels/CLInstanceNormalizationLayerKernel.cpp | 54 ++++-- .../kernels/CLInstanceNormalizationLayerKernel.h | 16 +- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 45 +++-- src/core/CL/kernels/CLL2NormalizeLayerKernel.h | 11 +- src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp | 45 +++-- src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h | 11 +- .../CL/kernels/CLMeanStdDevNormalizationKernel.cpp | 19 +- .../CL/kernels/CLMeanStdDevNormalizationKernel.h | 5 +- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 72 +++++--- src/core/CL/kernels/CLNormalizationLayerKernel.h | 7 +- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 56 +++--- .../CL/kernels/CLNormalizePlanarYUVLayerKernel.h | 9 +- src/core/CL/kernels/CLPadLayerKernel.cpp | 95 ++++++---- src/core/CL/kernels/CLPadLayerKernel.h | 20 ++- src/core/CL/kernels/CLPriorBoxLayerKernel.cpp | 83 ++++++--- src/core/CL/kernels/CLPriorBoxLayerKernel.h | 27 ++- .../CL/kernels/CLQLSTMLayerNormalizationKernel.cpp | 47 +++-- .../CL/kernels/CLQLSTMLayerNormalizationKernel.h | 9 +- src/core/CL/kernels/CLROIAlignLayerKernel.cpp | 51 ++++-- src/core/CL/kernels/CLROIAlignLayerKernel.h | 14 +- src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 38 ++-- src/core/CL/kernels/CLROIPoolingLayerKernel.h | 14 +- src/core/CL/kernels/CLRangeKernel.cpp | 38 ++-- src/core/CL/kernels/CLRangeKernel.h | 1 + src/core/CL/kernels/CLReductionOperationKernel.cpp | 103 ++++++----- src/core/CL/kernels/CLReductionOperationKernel.h | 10 +- src/core/CL/kernels/CLReorgLayerKernel.cpp | 41 +++-- src/core/CL/kernels/CLReorgLayerKernel.h | 1 + src/core/CL/kernels/CLReverseKernel.cpp | 16 +- src/core/CL/kernels/CLReverseKernel.h | 5 +- src/core/CL/kernels/CLSelectKernel.cpp | 33 ++-- src/core/CL/kernels/CLSelectKernel.h | 7 +- src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp | 100 ++++++++--- src/core/CL/kernels/CLSpaceToBatchLayerKernel.h | 35 +++- src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp | 23 ++- src/core/CL/kernels/CLSpaceToDepthLayerKernel.h | 4 +- src/core/CL/kernels/CLStackLayerKernel.cpp | 38 ++-- src/core/CL/kernels/CLStackLayerKernel.h | 17 +- src/core/CL/kernels/CLStridedSliceKernel.cpp | 101 ++++++----- src/core/CL/kernels/CLStridedSliceKernel.h | 24 ++- src/core/CL/kernels/CLTileKernel.cpp | 30 ++-- src/core/CL/kernels/CLTileKernel.h | 5 +- 78 files changed, 1903 insertions(+), 969 deletions(-) (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp index 2728958add..5b72354abe 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/Validate.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -44,16 +45,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::S64); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, + "Only ARG_IDX_MAX and ARG_IDX_MIN are supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, + "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - if(output->total_size() != 0) + if (output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64, DataType::U64); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64, + DataType::U64); } return Status{}; @@ -66,22 +71,34 @@ CLArgMinMaxLayerKernel::CLArgMinMaxLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLArgMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +void CLArgMinMaxLayerKernel::configure(const ICLTensor *input, + ICLTensor *output, + unsigned int axis, + ReductionOperation op) { configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op); } -void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + unsigned int axis, + ReductionOperation op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - TensorShape output_shape{ input->info()->tensor_shape() }; + TensorShape output_shape{input->info()->tensor_shape()}; output_shape.set(axis, 1); - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(DataType::S32).reset_padding().set_is_resizable(true)); + auto_init_if_empty(*output->info(), input->info() + ->clone() + ->set_tensor_shape(output_shape) + .set_data_type(DataType::S32) + .reset_padding() + .set_is_resizable(true)); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; @@ -90,11 +107,14 @@ void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context, // Set build options const auto adjusted_vector_size = adjust_vec_size(16U, input->info()->dimension(0)); - const auto vector_size = (adjusted_vector_size == 3U && axis == 0U) ? 2U : adjusted_vector_size; // the opencl kernel only supports sizes 2, 4, 8 and 16. + const auto vector_size = (adjusted_vector_size == 3U && axis == 0U) + ? 2U + : adjusted_vector_size; // the opencl kernel only supports sizes 2, 4, 8 and 16. CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % vector_size)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + + support::cpp11::to_string(input->info()->dimension(0) % vector_size)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vector_size)); build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DFLOAT_DATA_TYPE"); build_opts.add_option_if_else(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX", "-DARG_MIN"); @@ -104,7 +124,7 @@ void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context, // Create kernel std::string kernel_axis_name; - switch(axis) + switch (axis) { case 0: build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); @@ -135,7 +155,10 @@ void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context, ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLArgMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +Status CLArgMinMaxLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + unsigned int axis, + ReductionOperation op) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); return Status{}; @@ -146,7 +169,7 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - switch(_reduction_axis) + switch (_reduction_axis) { case 0: { @@ -154,7 +177,8 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) Window out_window(window); Window in_window(window); out_window.set(Window::DimX, Window::Dimension(0, 0, 0)); - in_window.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0))); + in_window.set(Window::DimX, + Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0))); in_window.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1u)); // Get first input and output slices @@ -166,15 +190,15 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(in_window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice)); + } while (in_window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice)); } break; case 1: { // Get first input and output slices - Window window_in{ window }; - window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1))); + Window window_in{window}; + window_in.set(Window::DimY, + Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1))); Window in_slice = window_in.first_slice_window_2D(); Window out_slice = window.first_slice_window_2D(); @@ -184,15 +208,15 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); + } while (window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); } break; case 2: { // Get first input and output slices - Window window_in{ window }; - window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2))); + Window window_in{window}; + window_in.set(Window::DimZ, + Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2))); Window in_slice = window_in.first_slice_window_3D(); Window out_slice = window.first_slice_window_3D(); @@ -202,14 +226,13 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice)); + } while (window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice)); } break; case 3: { // Get first input and output slices - Window window_in{ window }; + Window window_in{window}; window_in.set(3, Window::Dimension(0, 1, 1)); Window in_slice = window_in.first_slice_window_4D(); Window out_slice = window.first_slice_window_4D(); @@ -220,8 +243,7 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_4D_tensor_argument(idx, _input, in_slice); add_4D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice)); + } while (window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice)); } break; default: diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.h b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h index 5f36bdf113..fb3b41b0de 100644 --- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.h +++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -72,7 +73,11 @@ public: * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + unsigned int axis, + ReductionOperation op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel. * @@ -84,7 +89,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 3fa8a8edaa..c88a852a44 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -23,58 +23,64 @@ */ #include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" -#include "arm_compute/core/utils/ActivationFunctionUtils.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/ActivationFunctionUtils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon, ActivationLayerInfo act_info) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *mean, + const ITensorInfo *var, + const ITensorInfo *beta, + const ITensorInfo *gamma, + float epsilon, + ActivationLayerInfo act_info) { ARM_COMPUTE_UNUSED(epsilon); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, var); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0)); - if(beta != nullptr) + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index( + input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0)); + if (beta != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta); } - if(gamma != nullptr) + if (gamma != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma); } - if(act_info.enabled()) + if (act_info.enabled()) { ActivationLayerInfo::ActivationFunction act = act_info.activation(); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32 && input->data_type() != DataType::F16); - ARM_COMPUTE_RETURN_ERROR_ON(act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU - && act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU - && act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); + ARM_COMPUTE_RETURN_ERROR_ON(act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU && + act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU && + act != + ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU); ARM_COMPUTE_RETURN_ERROR_ON(act_info.b() > act_info.a()); } - if(output != nullptr && output->total_size() != 0) + if (output != nullptr && output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); @@ -86,14 +92,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, std::pair validate_and_configure_window_nchw(ITensorInfo *input, ITensorInfo *output) { - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); + const unsigned int num_elems_processed_per_iteration = + adjust_vec_size(16 / input->element_size(), input->dimension(0)); // Configure kernel window Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); bool window_changed = false; - if(output != nullptr) + if (output != nullptr) { AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); window_changed = update_window_and_padding(win, input_access, output_access); @@ -104,30 +111,50 @@ std::pair validate_and_configure_window_nchw(ITensorInfo *input, window_changed = update_window_and_padding(win, input_access); } - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + Status err = + (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel() - : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0), _run_in_place(false) + : _input(nullptr), + _output(nullptr), + _mean(nullptr), + _var(nullptr), + _beta(nullptr), + _gamma(nullptr), + _epsilon(0), + _run_in_place(false) { _type = CLKernelType::ELEMENTWISE; } -void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, - float epsilon, ActivationLayerInfo act_info) +void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *var, + const ICLTensor *beta, + const ICLTensor *gamma, + float epsilon, + ActivationLayerInfo act_info) { configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, var, beta, gamma, epsilon, act_info); } -void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, - const ICLTensor *gamma, - float epsilon, ActivationLayerInfo act_info) +void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *var, + const ICLTensor *beta, + const ICLTensor *gamma, + float epsilon, + ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, var); - auto padding_info = get_padding_info({ input, output, mean, var, beta, gamma }); + auto padding_info = get_padding_info({input, output, mean, var, beta, gamma}); _input = input; _output = output; _mean = mean; @@ -142,13 +169,15 @@ void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_ mean->info(), var->info(), (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr, epsilon, act_info)); - unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); + unsigned int num_elems_processed_per_iteration = + adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration)); build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); @@ -157,29 +186,33 @@ void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_ build_opts.add_option_if(gamma == nullptr, "-DUSE_DEFAULT_GAMMA"); // Create kernel - _kernel = create_kernel(compile_context, "batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); + _kernel = + create_kernel(compile_context, + "batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Set kernel static arguments unsigned int include_output = (!_run_in_place) ? 1 : 0; - unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor() + 2 * num_arguments_per_1D_tensor(); // Skip the input and output parameters - if(_beta != nullptr) + unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor() + + 2 * num_arguments_per_1D_tensor(); // Skip the input and output parameters + if (_beta != nullptr) { idx += num_arguments_per_1D_tensor(); // Skip beta parameter } - if(_gamma != nullptr) + if (_gamma != nullptr) { idx += num_arguments_per_1D_tensor(); // Skip gamma parameter } _kernel.setArg(idx++, _epsilon); - if(output != nullptr) + if (output != nullptr) { // Output tensor auto initialization if not yet initialized auto_init_if_empty(*output->info(), *input->info()->clone()); } // Configure kernel window - if(input->info()->data_layout() == DataLayout::NHWC) + if (input->info()->data_layout() == DataLayout::NHWC) { Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win); @@ -205,18 +238,23 @@ void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_ _config_id += lower_string(string_from_data_layout(input->info()->data_layout())); } -Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta, const ITensorInfo *gamma, - float epsilon, ActivationLayerInfo act_info) +Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *mean, + const ITensorInfo *var, + const ITensorInfo *beta, + const ITensorInfo *gamma, + float epsilon, + ActivationLayerInfo act_info) { const bool run_in_place = (output == nullptr) || (output == input); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info)); - if(input->data_layout() != DataLayout::NHWC) + if (input->data_layout() != DataLayout::NHWC) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_nchw(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()) - .first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window_nchw(input->clone().get(), (run_in_place) ? nullptr : output->clone().get()) + .first); } return Status{}; @@ -236,11 +274,11 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor(); add_1D_tensor_argument(idx, _mean, vector_slice); add_1D_tensor_argument(idx, _var, vector_slice); - if(_beta != nullptr) + if (_beta != nullptr) { add_1D_tensor_argument(idx, _beta, vector_slice); } - if(_gamma != nullptr) + if (_gamma != nullptr) { add_1D_tensor_argument(idx, _gamma, vector_slice); } @@ -249,11 +287,10 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue { idx = 0; add_3D_tensor_argument(idx, _input, slice); - if(!_run_in_place) + if (!_run_in_place) { add_3D_tensor_argument(idx, _output, slice); } enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); + } while (window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h index acbe0f2a26..1a88d2a8c5 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H #include "arm_compute/function_info/ActivationLayerInfo.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -64,7 +65,13 @@ public: * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f, + void configure(ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *var, + const ICLTensor *beta = nullptr, + const ICLTensor *gamma = nullptr, + float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); /** Set the input and output tensors. * @@ -82,8 +89,15 @@ public: * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, - const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *var, + const ICLTensor *beta = nullptr, + const ICLTensor *gamma = nullptr, + float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. @@ -99,10 +113,14 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *mean, const ITensorInfo *var, - const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr, - float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo()); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *mean, + const ITensorInfo *var, + const ITensorInfo *beta = nullptr, + const ITensorInfo *gamma = nullptr, + float epsilon = 0.001f, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp index 143a842d02..c640b5a8d6 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp @@ -25,13 +25,14 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" -#include "arm_compute/core/TensorInfo.h" using namespace arm_compute::misc::shape_calculator; namespace arm_compute @@ -46,7 +47,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -54,7 +55,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf return Status{}; } -Status validate_arguments_static(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const ITensorInfo *output, const CropInfo &crop_info) +Status validate_arguments_static(const ITensorInfo *input, + const int block_shape_x, + const int block_shape_y, + const ITensorInfo *output, + const CropInfo &crop_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); @@ -66,10 +71,11 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] % (block_shape_x * block_shape_y) != 0); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { - const TensorShape expected_output_shape = compute_batch_to_space_shape(input->data_layout(), input->tensor_shape(), block_shape_x, block_shape_y, crop_info); - const TensorInfo expected_output = output->clone()->set_tensor_shape(expected_output_shape); + const TensorShape expected_output_shape = compute_batch_to_space_shape( + input->data_layout(), input->tensor_shape(), block_shape_x, block_shape_y, crop_info); + const TensorInfo expected_output = output->clone()->set_tensor_shape(expected_output_shape); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &expected_output); ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -79,8 +85,7 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape } } // namespace -CLBatchToSpaceLayerKernel::CLBatchToSpaceLayerKernel() - : _input(nullptr), _block_shape(nullptr), _output(nullptr) +CLBatchToSpaceLayerKernel::CLBatchToSpaceLayerKernel() : _input(nullptr), _block_shape(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -90,11 +95,14 @@ void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTenso configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output); } -void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output) +void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *block_shape, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - auto padding_info = get_padding_info({ input, block_shape, output }); + auto padding_info = get_padding_info({input, block_shape, output}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info())); @@ -106,8 +114,9 @@ void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_contex CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(3))); - _kernel = create_kernel(compile_context, "batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); - + _kernel = create_kernel(compile_context, + "batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); @@ -116,47 +125,65 @@ void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_contex ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, const CropInfo &crop_info) +void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, + const int32_t block_shape_x, + const int32_t block_shape_y, + ICLTensor *output, + const CropInfo &crop_info) { configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, output, crop_info); } -void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, - const CropInfo &crop_info) +void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const int32_t block_shape_x, + const int32_t block_shape_y, + ICLTensor *output, + const CropInfo &crop_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - const TensorShape output_shape = compute_batch_to_space_shape(input->info()->data_layout(), input->info()->tensor_shape(), block_shape_x, block_shape_y); + const TensorShape output_shape = compute_batch_to_space_shape( + input->info()->data_layout(), input->info()->tensor_shape(), block_shape_x, block_shape_y); auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info(), crop_info)); + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info(), crop_info)); _input = input; _output = output; // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(3))); build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x)); build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y)); build_opts.add_option("-DCROP_LEFT=" + support::cpp11::to_string(crop_info.left)); build_opts.add_option("-DCROP_TOP=" + support::cpp11::to_string(crop_info.top)); - _kernel = create_kernel(compile_context, "batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); + _kernel = create_kernel( + compile_context, "batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); ICLKernel::configure_internal(win); } -Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output) +Status +CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_shape, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, output)); return Status{}; } -Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output, const CropInfo &crop_info) +Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, + const int32_t block_shape_x, + const int32_t block_shape_y, + const ITensorInfo *output, + const CropInfo &crop_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, output, crop_info)); @@ -185,7 +212,7 @@ void CLBatchToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu unsigned int idx = 0; add_4D_tensor_argument(idx, _input, slice_in); add_argument(idx, batch_id); - if(_block_shape != nullptr) + if (_block_shape != nullptr) { add_1D_tensor_argument(idx, _block_shape, vector_slice); } @@ -193,7 +220,6 @@ void CLBatchToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu enqueue(queue, *this, slice_out, lws_hint()); ++batch_id; - } - while(window.slide_window_slice_3D(slice_out)); + } while (window.slide_window_slice_3D(slice_out)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h index a05184cd5b..b9d3e66fe2 100644 --- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h +++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -65,7 +66,10 @@ public: * * @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *block_shape, + ICLTensor *output); /** Initialise the kernel's inputs and output (Static block shape). * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -74,7 +78,11 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed */ - void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, const CropInfo &crop_info); + void configure(const ICLTensor *input, + const int32_t block_shape_x, + const int32_t block_shape_y, + ICLTensor *output, + const CropInfo &crop_info); /** Initialise the kernel's inputs and output (Static block shape). * * @param[in] compile_context The compile context to be used. @@ -84,7 +92,12 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, const CropInfo &crop_info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const int32_t block_shape_x, + const int32_t block_shape_y, + ICLTensor *output, + const CropInfo &crop_info); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -106,7 +119,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output, const CropInfo &crop_info); + static Status validate(const ITensorInfo *input, + const int32_t block_shape_x, + const int32_t block_shape_y, + const ITensorInfo *output, + const CropInfo &crop_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLBitwiseKernel.cpp b/src/core/CL/kernels/CLBitwiseKernel.cpp index 11e6d021a5..de3fb43de8 100644 --- a/src/core/CL/kernels/CLBitwiseKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseKernel.cpp @@ -28,25 +28,29 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" +#include "arm_compute/core/Validate.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" namespace arm_compute { -CLBitwiseKernel::CLBitwiseKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) +CLBitwiseKernel::CLBitwiseKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLBitwiseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, BitwiseOperation op) +void CLBitwiseKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + BitwiseOperation op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); - if(op != BitwiseOperation::NOT) + if (op != BitwiseOperation::NOT) { ARM_COMPUTE_ERROR_ON_NULLPTR(input2); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8); @@ -56,7 +60,7 @@ void CLBitwiseKernel::configure(const CLCompileContext &compile_context, const I // Output auto inizialitation if not yet initialized auto_init_if_empty(*(output->info()), *(input1->info())); - auto padding_info = get_padding_info({ input1, input2, output }); + auto padding_info = get_padding_info({input1, input2, output}); // Configure kernel window const unsigned int vec_size_x = adjust_vec_size(16 / output->info()->element_size(), output->info()->dimension(0)); @@ -68,7 +72,7 @@ void CLBitwiseKernel::configure(const CLCompileContext &compile_context, const I // Create kernel std::string kernel_name = ""; - switch(op) + switch (op) { case BitwiseOperation::AND: kernel_name = "bitwise_and"; @@ -107,13 +111,12 @@ void CLBitwiseKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_2D_tensor_argument(idx, _input1, slice); - if(_input2 != nullptr) + if (_input2 != nullptr) { add_2D_tensor_argument(idx, _input2, slice); } add_2D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); + } while (window.slide_window_slice_2D(slice)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLBitwiseKernel.h b/src/core/CL/kernels/CLBitwiseKernel.h index c5a999643d..2c74955ae4 100644 --- a/src/core/CL/kernels/CLBitwiseKernel.h +++ b/src/core/CL/kernels/CLBitwiseKernel.h @@ -59,7 +59,11 @@ public: * @param[out] output Destination tensor. Data types supported: U8. * @param[in] op Bitwise operation to perform. Supported: AND, OR, NOT, XOR. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, BitwiseOperation op); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + BitwiseOperation op); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp index 72de854afb..f32c518e29 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -40,7 +41,10 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info) +Status validate_arguments(const ITensorInfo *boxes, + const ITensorInfo *pred_boxes, + const ITensorInfo *deltas, + const BoundingBoxTransformInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(boxes); @@ -53,7 +57,7 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe ARM_COMPUTE_RETURN_ERROR_ON(boxes->num_dimensions() > 2); const bool is_qasymm16 = boxes->data_type() == DataType::QASYMM16; - if(is_qasymm16) + if (is_qasymm16) { const UniformQuantizationInfo boxes_qinfo = boxes->quantization_info().uniform(); ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f); @@ -65,12 +69,12 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes, deltas); } - if(pred_boxes->total_size() > 0) + if (pred_boxes->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(pred_boxes->tensor_shape(), deltas->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, boxes); ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes->num_dimensions() > 2); - if(is_qasymm16) + if (is_qasymm16) { const UniformQuantizationInfo pred_boxes_qinfo = pred_boxes->quantization_info().uniform(); ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes_qinfo.scale != 0.125f); @@ -83,22 +87,31 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe } } // namespace -CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel() - : _boxes(nullptr), _pred_boxes(nullptr), _deltas(nullptr) +CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel() : _boxes(nullptr), _pred_boxes(nullptr), _deltas(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info) +void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, + ICLTensor *pred_boxes, + const ICLTensor *deltas, + const BoundingBoxTransformInfo &info) { configure(CLKernelLibrary::get().get_compile_context(), boxes, pred_boxes, deltas, info); } -void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info) +void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *boxes, + ICLTensor *pred_boxes, + const ICLTensor *deltas, + const BoundingBoxTransformInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas); - auto padding_info = get_padding_info({ boxes, pred_boxes, deltas }); - auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info())); + auto padding_info = get_padding_info({boxes, pred_boxes, deltas}); + auto_init_if_empty(*pred_boxes->info(), deltas->info() + ->clone() + ->set_data_type(boxes->info()->data_type()) + .set_quantization_info(boxes->info()->quantization_info())); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(boxes->info(), pred_boxes->info(), deltas->info(), info)); @@ -128,7 +141,7 @@ void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_con build_opts.add_option_if(info.apply_scale(), "-DSCALE_AFTER=" + float_to_string_with_full_precision(info.scale())); build_opts.add_option_if(info.correct_transform_coords(), "-DOFFSET=1"); - if(is_quantized) + if (is_quantized) { build_opts.add_option("-DDATA_TYPE_DELTAS=" + get_cl_type_from_data_type(deltas->info()->data_type())); const UniformQuantizationInfo boxes_qinfo = boxes->info()->quantization_info().uniform(); @@ -148,12 +161,15 @@ void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_con // Since the number of columns is a multiple of 4 by definition, we don't need to pad the tensor const unsigned int num_elems_processed_per_iteration = 4; - Window win = calculate_max_window(*deltas->info(), Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(*deltas->info(), Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLBoundingBoxTransformKernel::validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info) +Status CLBoundingBoxTransformKernel::validate(const ITensorInfo *boxes, + const ITensorInfo *pred_boxes, + const ITensorInfo *deltas, + const BoundingBoxTransformInfo &info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(boxes, pred_boxes, deltas, info)); return Status{}; diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.h b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h index 08f350e86a..9a1bb49bb9 100644 --- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.h +++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h @@ -58,7 +58,10 @@ public: * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. * */ - void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + void configure(const ICLTensor *boxes, + ICLTensor *pred_boxes, + const ICLTensor *deltas, + const BoundingBoxTransformInfo &info); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -71,7 +74,11 @@ public: * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct. * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *boxes, + ICLTensor *pred_boxes, + const ICLTensor *deltas, + const BoundingBoxTransformInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform * @@ -85,7 +92,10 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info); + static Status validate(const ITensorInfo *boxes, + const ITensorInfo *pred_boxes, + const ITensorInfo *deltas, + const BoundingBoxTransformInfo &info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index a2a0bc4fb4..ec58bf9e7a 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -46,15 +47,19 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups < 2, "Channel shuffling with less than 2 groups would be inefficient"); - const unsigned int channels = input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)); + const unsigned int channels = + input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups == channels, "Channel shuffling with same number of groups as number of channels would be inefficient"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + num_groups == channels, + "Channel shuffling with same number of groups as number of channels would be inefficient"); // There cannot be more groups than channels ARM_COMPUTE_RETURN_ERROR_ON(num_groups > channels); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((channels % num_groups) != 0, "The number of channels must be a multiple of the number of groups"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((channels % num_groups) != 0, + "The number of channels must be a multiple of the number of groups"); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); @@ -70,11 +75,12 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen auto_init_if_empty(*output, *input->clone()); const bool is_nhwc = input->data_layout() == DataLayout::NHWC; - if(is_nhwc) + if (is_nhwc) { - unsigned int num_elems_processed_per_iteration_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x)); - Window win_collapsed = win.collapse(win, Window::DimZ); + unsigned int num_elems_processed_per_iteration_x = + adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x)); + Window win_collapsed = win.collapse(win, Window::DimZ); return std::make_pair(Status{}, win_collapsed); } else @@ -83,22 +89,25 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen constexpr unsigned int num_elems_processed_per_iteration_y = 2; // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); - AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); + Window win = calculate_max_window( + *input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, + num_elems_processed_per_iteration_y); + AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, + num_elems_processed_per_iteration_y); const bool window_changed = update_window_and_padding(win, input_access, output_access); Window win_collapsed = win.collapse(win, Window::DimZ); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + Status err = + (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win_collapsed); } } } // namespace -CLChannelShuffleLayerKernel::CLChannelShuffleLayerKernel() - : _input(nullptr), _output(nullptr) +CLChannelShuffleLayerKernel::CLChannelShuffleLayerKernel() : _input(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -108,23 +117,27 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o configure(CLKernelLibrary::get().get_compile_context(), input, output, num_groups); } -void CLChannelShuffleLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups) +void CLChannelShuffleLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), num_groups)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; - const DataLayout data_layout = input->info()->data_layout(); - const bool is_nhwc = data_layout == DataLayout::NHWC; - const unsigned int channels = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)); - unsigned int vec_size_x = 0; - unsigned int vec_size_x_leftovers = 0; - if(is_nhwc) + const DataLayout data_layout = input->info()->data_layout(); + const bool is_nhwc = data_layout == DataLayout::NHWC; + const unsigned int channels = + input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL)); + unsigned int vec_size_x = 0; + unsigned int vec_size_x_leftovers = 0; + if (is_nhwc) { - vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0)); + vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0)); vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x; } else @@ -170,13 +183,14 @@ void CLChannelShuffleLayerKernel::configure(const CLCompileContext &compile_cont _config_id += support::cpp11::to_string(output->info()->dimension(1)); _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(2)); - if(data_layout == DataLayout::NHWC) + if (data_layout == DataLayout::NHWC) { ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } } -Status CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) +Status +CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, num_groups)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.h b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h index 31c007f17e..43c939ebd8 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.h +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h @@ -60,7 +60,10 @@ public: * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + unsigned int num_groups); /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel * * @param[in] input Input tensor info. Data types supported: All. diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index f4d6316517..f27270733e 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -38,14 +39,10 @@ namespace arm_compute namespace { // Create supported comparisons map -const std::map supported_comparison_ops = -{ - { ComparisonOperation::Equal, "EQUAL" }, - { ComparisonOperation::NotEqual, "NOTEQUAL" }, - { ComparisonOperation::Greater, "GREATER" }, - { ComparisonOperation::GreaterEqual, "GREATEREQUAL" }, - { ComparisonOperation::Less, "LESS" }, - { ComparisonOperation::LessEqual, "LESSEQUAL" }, +const std::map supported_comparison_ops = { + {ComparisonOperation::Equal, "EQUAL"}, {ComparisonOperation::NotEqual, "NOTEQUAL"}, + {ComparisonOperation::Greater, "GREATER"}, {ComparisonOperation::GreaterEqual, "GREATEREQUAL"}, + {ComparisonOperation::Less, "LESS"}, {ComparisonOperation::LessEqual, "LESSEQUAL"}, }; int calculate_num_elems_processed_per_iteration(const ITensorInfo &input) @@ -53,7 +50,10 @@ int calculate_num_elems_processed_per_iteration(const ITensorInfo &input) return 16 / input.element_size(); } -Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ComparisonOperation operation) +Status validate_arguments(const ITensorInfo &input1, + const ITensorInfo &input2, + const ITensorInfo &output, + ComparisonOperation operation) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1); ARM_COMPUTE_RETURN_ERROR_ON(input1.data_type() == DataType::UNKNOWN); @@ -64,7 +64,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); // Validate in case of configured output - if(output.total_size() > 0) + if (output.total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8); ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), @@ -76,7 +76,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) { - const TensorShape &out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); + const TensorShape &out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); const unsigned int num_elems_processed_per_iteration = calculate_num_elems_processed_per_iteration(input1); // Auto initialize output if not initialized @@ -90,27 +90,34 @@ std::pair validate_and_configure_window(ITensorInfo &input1, ITe AccessWindowHorizontal input2_access(&input2, 0, num_elems_processed_per_iteration); AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration); - bool window_changed = update_window_and_padding(win_input1, input1_access) - || update_window_and_padding(win_input2, input2_access) - || update_window_and_padding(win, output_access); + bool window_changed = update_window_and_padding(win_input1, input1_access) || + update_window_and_padding(win_input2, input2_access) || + update_window_and_padding(win, output_access); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + Status err = + (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace -CLComparisonKernel::CLComparisonKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) +CLComparisonKernel::CLComparisonKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation) +void CLComparisonKernel::configure(const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + ComparisonOperation operation) { configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, operation); } -void CLComparisonKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation) +void CLComparisonKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + ComparisonOperation operation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), operation)); @@ -129,10 +136,11 @@ void CLComparisonKernel::configure(const CLCompileContext &compile_context, cons // Set kernel build options std::set build_opts; build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())); - build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info()))); + build_opts.emplace("-DVEC_SIZE=" + + support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info()))); build_opts.emplace("-DOP=" + operation_name); build_opts.emplace("-DOP_NAME=" + lower_string(operation_name)); - if(is_data_type_quantized(input1->info()->data_type())) + if (is_data_type_quantized(input1->info()->data_type())) { const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); @@ -160,12 +168,16 @@ void CLComparisonKernel::configure(const CLCompileContext &compile_context, cons _config_id += lower_string(string_from_data_layout(input1->info()->data_layout())); } -Status CLComparisonKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation) +Status CLComparisonKernel::validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + ComparisonOperation operation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output, operation)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(*input1->clone(), *input2->clone(), *output->clone()).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(*input1->clone(), *input2->clone(), *output->clone()).first); return Status{}; } @@ -181,17 +193,18 @@ void CLComparisonKernel::run(const Window &window, cl::CommandQueue &queue) bool can_collapse = true; const bool is_vector = in_shape1.num_dimensions() == 1 || in_shape2.num_dimensions() == 1; - if(std::min(in_shape1.total_size(), in_shape2.total_size()) > 1 && !is_vector) + if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1 && !is_vector) { can_collapse = (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for(size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) + for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++) { can_collapse = (in_shape1[d] == in_shape2[d]); } } bool has_collapsed = false; - Window collapsed = can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) : window; + Window collapsed = + can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) : window; const TensorShape &in_shape1_collapsed = has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; const TensorShape &in_shape2_collapsed = has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; @@ -212,16 +225,16 @@ void CLComparisonKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1)); ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2)); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } BorderSize CLComparisonKernel::border_size() const { const int num_elems_processed_per_iteration = calculate_num_elems_processed_per_iteration(*_input1->info()); - const unsigned int replicateSize = _output->info()->dimension(0) - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = std::min(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize{ 0, border, 0, 0 }; + const unsigned int replicateSize = + _output->info()->dimension(0) - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); + const unsigned int border = std::min(num_elems_processed_per_iteration - 1U, replicateSize); + return BorderSize{0, border, 0, 0}; } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLComparisonKernel.h b/src/core/CL/kernels/CLComparisonKernel.h index 0b94190183..174a6c9bf9 100644 --- a/src/core/CL/kernels/CLComparisonKernel.h +++ b/src/core/CL/kernels/CLComparisonKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLCOMPARISONKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -64,7 +65,11 @@ public: * @param[out] output Destination tensor. Data types supported: U8. * @param[in] operation Comparison operation to use. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + ComparisonOperation operation); /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel * * @param[in] input1 Source tensor. Data types supported: All. @@ -74,10 +79,13 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + ComparisonOperation operation); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; private: diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 76af5d564a..f8ecc4c098 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -29,6 +29,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/WindowHelpers.h" @@ -40,7 +41,8 @@ CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel() _type = CLKernelType::ELEMENTWISE; } -Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, const ITensorInfo *output, +Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, const PadStrideInfo &info) { ARM_COMPUTE_UNUSED(info); @@ -60,7 +62,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c)); - for(size_t i = 3; i < Coordinates::num_max_dimensions; ++i) + for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); } @@ -68,20 +70,21 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co return Status{}; } -void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, - const PadStrideInfo &info) +void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info) { configure(CLKernelLibrary::get().get_compile_context(), input, output, info); } -void CLDeconvolutionLayerUpsampleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, - const PadStrideInfo &info) +void CLDeconvolutionLayerUpsampleKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const PadStrideInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), info)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; @@ -119,7 +122,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu const int out_end_y = _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1; const int out_step_y = _info.stride().second; - switch(_data_layout) + switch (_data_layout) { case DataLayout::NCHW: { @@ -137,8 +140,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); enqueue(queue, *this, slice_out, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out)); + } while (collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out)); break; } case DataLayout::NHWC: @@ -156,8 +158,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice_out); enqueue(queue, *this, slice_out, lws_hint()); - } - while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); + } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); break; } default: diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h index e0d1322341..762989a836 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h @@ -62,7 +62,10 @@ public: * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. * @param[in] info Contains padding and stride information described in @ref PadStrideInfo. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample * * @param[in] input Source tensor info. Data types supported: All. diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp index 0fc0ff8168..b33e0a8b6f 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp @@ -27,9 +27,10 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" +#include "arm_compute/core/Validate.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -38,7 +39,11 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *bias, + const ITensorInfo *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, const PadStrideInfo &deconv_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, input_info, weights_info); @@ -53,19 +58,21 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con ARM_COMPUTE_RETURN_ERROR_ON(weights_info->dimension(idx_w) != deconv_info.stride().first); ARM_COMPUTE_RETURN_ERROR_ON(weights_info->dimension(idx_h) != deconv_info.stride().second); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32); - if(!is_qasymm) + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, + DataType::QASYMM8_SIGNED, DataType::S32); + if (!is_qasymm) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_info, weights_info); } - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_info->dimension(idx_w) * weights_info->dimension(idx_h) * weights_info->dimension(idx_b)); + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_info->dimension(idx_w) * weights_info->dimension(idx_h) * + weights_info->dimension(idx_b)); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != input_info->dimension(idx_w)); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != input_info->dimension(idx_h)); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) != input_info->dimension(idx_b)); - if(bias != nullptr) + if (bias != nullptr) { - if(is_qasymm) + if (is_qasymm) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32); } @@ -76,19 +83,26 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights_info->dimension(idx_b)); } - if(output->total_size() != 0) + if (output->total_size() != 0) { const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second); - auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info); + auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), + weights_info->dimension(idx_w), weights_info->dimension(idx_h), + stride_info); - const TensorShape output_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info); + const TensorShape output_shape = + misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); } return Status{}; } -std::pair validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info) +std::pair validate_and_configure_window(const ITensorInfo *input, + ITensorInfo *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -97,11 +111,17 @@ std::pair validate_and_configure_window(const ITensorInfo *input const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second); - auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info); + auto out_dims = + deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), + weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info); - const TensorShape output_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info); + const TensorShape output_shape = + misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info); - auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout).set_quantization_info(input->quantization_info())); + auto_init_if_empty(*output, input->clone() + ->set_tensor_shape(output_shape) + .set_data_layout(data_layout) + .set_quantization_info(input->quantization_info())); Window win = calculate_max_window(*input); @@ -109,29 +129,37 @@ std::pair validate_and_configure_window(const ITensorInfo *input } } // namespace -CLDeconvolutionReshapeOutputKernel::CLDeconvolutionReshapeOutputKernel() - : _add_bias(false), - _bias(nullptr) +CLDeconvolutionReshapeOutputKernel::CLDeconvolutionReshapeOutputKernel() : _add_bias(false), _bias(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, +void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input, + const ICLTensor *bias, + ICLTensor *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, const PadStrideInfo &deconv_info) { configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, input_info, weights_info, deconv_info); } -void CLDeconvolutionReshapeOutputKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, - const ITensorInfo *weights_info, - const PadStrideInfo &deconv_info) +void CLDeconvolutionReshapeOutputKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *bias, + ICLTensor *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, input_info, weights_info); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), input_info, weights_info, deconv_info)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), + output->info(), input_info, weights_info, deconv_info)); - auto padding_info = get_padding_info({ input, bias, output }); + auto padding_info = get_padding_info({input, bias, output}); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), input_info, weights_info, deconv_info); + auto win_config = + validate_and_configure_window(input->info(), output->info(), input_info, weights_info, deconv_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); const DataLayout data_layout = input_info->data_layout(); @@ -178,7 +206,11 @@ void CLDeconvolutionReshapeOutputKernel::configure(const CLCompileContext &compi ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLDeconvolutionReshapeOutputKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, +Status CLDeconvolutionReshapeOutputKernel::validate(const ITensorInfo *input, + const ITensorInfo *bias, + const ITensorInfo *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, const PadStrideInfo &deconv_info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, input_info, weights_info, deconv_info)); @@ -194,7 +226,7 @@ void CLDeconvolutionReshapeOutputKernel::run(const Window &window, cl::CommandQu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, collapsed); add_3D_tensor_argument(idx, _output, collapsed); - if(_add_bias) + if (_add_bias) { add_1D_tensor_argument(idx, _bias, collapsed); } diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h index ce354fa86f..8f436b07e3 100644 --- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h +++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h @@ -67,7 +67,12 @@ public: * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. */ - void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + void configure(const ICLTensor *input, + const ICLTensor *bias, + ICLTensor *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info); /** Initialise the kernel's source and destination. * * @param[in] compile_context The compile context to be used. @@ -79,8 +84,13 @@ public: * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input. * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, - const PadStrideInfo &deconv_info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *bias, + ICLTensor *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info); /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel. * @@ -93,7 +103,12 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *bias, + const ITensorInfo *output, + const ITensorInfo *input_info, + const ITensorInfo *weights_info, + const PadStrideInfo &deconv_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp index 5c1dc4fbf6..cdf19ab2e1 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -49,12 +50,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] % (block_shape * block_shape) != 0); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] != (block_shape * input->tensor_shape()[idx_width])); - ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] != (block_shape * input->tensor_shape()[idx_height])); + ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] != + (block_shape * input->tensor_shape()[idx_width])); + ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] != + (block_shape * input->tensor_shape()[idx_height])); ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } @@ -63,8 +66,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } } // namespace -CLDepthToSpaceLayerKernel::CLDepthToSpaceLayerKernel() - : _input(nullptr), _output(nullptr), _block_shape() +CLDepthToSpaceLayerKernel::CLDepthToSpaceLayerKernel() : _input(nullptr), _output(nullptr), _block_shape() { _type = CLKernelType::ELEMENTWISE; } @@ -74,14 +76,18 @@ void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *out configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape); } -void CLDepthToSpaceLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape) +void CLDepthToSpaceLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + int32_t block_shape) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - TensorShape output_shape = compute_depth_to_space_shape(input->info()->tensor_shape(), input->info()->data_layout(), block_shape); + TensorShape output_shape = + compute_depth_to_space_shape(input->info()->tensor_shape(), input->info()->data_layout(), block_shape); auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape)); @@ -98,7 +104,9 @@ void CLDepthToSpaceLayerKernel::configure(const CLCompileContext &compile_contex build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel))); build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape)); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); - _kernel = create_kernel(compile_context, "depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); + _kernel = create_kernel(compile_context, + "depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); @@ -137,7 +145,6 @@ void CLDepthToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu enqueue(queue, *this, slice_in, lws_hint()); ++batch_id; - } - while(window.slide_window_slice_3D(slice_in)); + } while (window.slide_window_slice_3D(slice_in)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h index 1f7f77b569..cef70c4dda 100644 --- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h +++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -61,7 +62,8 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); + void + configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel. * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index e34b6929e7..b95abe795f 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -23,16 +23,17 @@ */ #include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h" -#include "arm_compute/core/utils/ActivationFunctionUtils.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/utils/ActivationFunctionUtils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLUtils.h" #include "src/core/CL/CLValidate.h" #include "src/core/CL/ICLKernel.h" @@ -45,12 +46,18 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info, - const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ITensorInfo *output_multipliers, + const ITensorInfo *output_shifts) { ARM_COMPUTE_UNUSED(dwc_info); bool in_place = false; - if(output == nullptr || output == input) + if (output == nullptr || output == input) { in_place = true; output = input; @@ -58,11 +65,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first > 1 && dwc_info.m0 != 1); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation.x() > 1 && dwc_info.m0 != 1); ARM_COMPUTE_RETURN_ERROR_ON((dwc_info.export_input_to_cl_image == true)); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((dwc_info.export_weights_to_cl_image == true) && (export_to_cl_image(weights) == false), "Weights cannot be exported to cl_image!"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((dwc_info.export_weights_to_cl_image == true) && + (export_to_cl_image(weights) == false), + "Weights cannot be exported to cl_image!"); ARM_COMPUTE_RETURN_ERROR_ON((dwc_info.export_weights_to_cl_image == true) && ((dwc_info.n0 % 4) != 0)); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first < 1); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().second < 1); @@ -72,33 +82,40 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_c) != (input->dimension(idx_c) * conv_info.depth_multiplier)); // In place restrictions - if(in_place) + if (in_place) { - const int weights_width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); - const int weights_height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape()[weights_width_idx] != 1U || weights->tensor_shape()[weights_height_idx] != 1U); + const int weights_width_idx = + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); + const int weights_height_idx = + get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); + ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape()[weights_width_idx] != 1U || + weights->tensor_shape()[weights_height_idx] != 1U); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.depth_multiplier != 1U); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride() != std::make_pair(1U, 1U)); ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation != Size2D(1U, 1U)); - ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.has_padding()); // Note that in princple padding can be supported with in_place but we choose not to support it + ARM_COMPUTE_RETURN_ERROR_ON( + conv_info.pad_stride_info + .has_padding()); // Note that in princple padding can be supported with in_place but we choose not to support it } - const ConvolutionInfo info{ conv_info.pad_stride_info, conv_info.depth_multiplier, ActivationLayerInfo(), conv_info.dilation }; - const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info); + const ConvolutionInfo info{conv_info.pad_stride_info, conv_info.depth_multiplier, ActivationLayerInfo(), + conv_info.dilation}; + const TensorShape output_shape = + arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info); - if(conv_info.depth_multiplier > 1 && dwc_info.n0 > 1) + if (conv_info.depth_multiplier > 1 && dwc_info.n0 > 1) { ARM_COMPUTE_RETURN_ERROR_ON((conv_info.depth_multiplier % dwc_info.n0) != 0); } const bool is_quantized = is_data_type_quantized(input->data_type()); - if(biases != nullptr) + if (biases != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != output_shape[idx_c]); ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); - if(is_quantized) + if (is_quantized) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); } @@ -108,7 +125,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } } - if(is_quantized) + if (is_quantized) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output_multipliers, output_shifts); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_multipliers, 1, DataType::S32); @@ -116,7 +133,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON(output_multipliers->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(output_shifts->num_dimensions() > 1); - if(is_data_type_quantized_per_channel(weights->data_type())) + if (is_data_type_quantized_per_channel(weights->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QSYMM8_PER_CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(output_shape[idx_c] != output_multipliers->dimension(0)); @@ -134,22 +151,24 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); } - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } - if(is_data_type_quantized(input->data_type())) + if (is_data_type_quantized(input->data_type())) { const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = (output->total_size() != 0) ? output->quantization_info().uniform() : iq_info; + const UniformQuantizationInfo oq_info = + (output->total_size() != 0) ? output->quantization_info().uniform() : iq_info; float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; - ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + ARM_COMPUTE_RETURN_ON_ERROR( + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); } return Status{}; @@ -171,30 +190,48 @@ CLDepthwiseConvolutionLayerNativeKernel::CLDepthwiseConvolutionLayerNativeKernel _type = CLKernelType::DEPTHWISE; } -void CLDepthwiseConvolutionLayerNativeKernel::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, - const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info, - const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +void CLDepthwiseConvolutionLayerNativeKernel::configure(ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ICLTensor *output_multipliers, + const ICLTensor *output_shifts) { - configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_info, conv_info, output_multipliers, output_shifts); + configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_info, conv_info, + output_multipliers, output_shifts); } -void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, - const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info, - const ICLTensor *output_multipliers, const ICLTensor *output_shifts) +void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ICLTensor *output_multipliers, + const ICLTensor *output_shifts) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights); - if(output == nullptr) + if (output == nullptr) { // In-place output = input; } - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), - dwc_info, conv_info, (output_multipliers != nullptr) ? output_multipliers->info() : nullptr, (output_shifts != nullptr) ? output_shifts->info() : nullptr)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments( + input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), dwc_info, + conv_info, (output_multipliers != nullptr) ? output_multipliers->info() : nullptr, + (output_shifts != nullptr) ? output_shifts->info() : nullptr)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); - const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*(input->info()), *(weights->info()), conv_info); - auto_init_if_empty(*(output->info()), input->info()->clone()->set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info())); + const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape( + *(input->info()), *(weights->info()), conv_info); + auto_init_if_empty(*(output->info()), input->info() + ->clone() + ->set_tensor_shape(output_shape) + .set_quantization_info(output->info()->quantization_info())); _input = input; _output = output; @@ -214,12 +251,12 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & CLBuildOptions build_opts; // Update the padding for the input/weights tensor if we can export to cl_image - if(_export_input_to_cl_image) + if (_export_input_to_cl_image) { arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(input->info()); } - if(_export_weights_to_cl_image) + if (_export_weights_to_cl_image) { arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weights->info()); } @@ -229,9 +266,10 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & const auto act_function = conv_info.act_info.activation(); const auto dst_data_type = _output->info()->data_type(); - if((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) - && (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU || act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - && (dst_data_type == DataType::F32 || dst_data_type == DataType::F16)) + if ((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) && + (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU || + act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) && + (dst_data_type == DataType::F32 || dst_data_type == DataType::F16)) { // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations @@ -268,23 +306,24 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); build_opts.add_option("-DM0_A=" + support::cpp11::to_string(_weights->info()->dimension(1) + m0 - 1)); - build_opts.add_option_if_else(conv_info.depth_multiplier > 1, "-DN0_A=1", "-DN0_A=" + support::cpp11::to_string(n0)); + build_opts.add_option_if_else(conv_info.depth_multiplier > 1, "-DN0_A=1", + "-DN0_A=" + support::cpp11::to_string(n0)); build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_output->info()->dimension(0) % n0)); build_opts.add_option_if(_input->info()->num_dimensions() > 3, "-DBATCHED_EXECUTION"); // Force unroll with pragma when any of the following values exceed the maximum number of manual unroll - set_unroll_with_pragma(build_opts, { static_cast(_weights->info()->dimension(1) + m0 - 1), - static_cast(_weights->info()->dimension(1)), - static_cast(_weights->info()->dimension(2)) - }); + set_unroll_with_pragma(build_opts, {static_cast(_weights->info()->dimension(1) + m0 - 1), + static_cast(_weights->info()->dimension(1)), + static_cast(_weights->info()->dimension(2))}); - if(biases != nullptr) + if (biases != nullptr) { build_opts.add_option(std::string("-DHAS_BIAS")); - build_opts.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->info()->data_type()))); + build_opts.add_option( + std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->info()->data_type()))); } - if(_is_quantized) + if (_is_quantized) { kernel_name = "dwc_native_quantized_nhwc"; const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); @@ -306,13 +345,17 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & build_opts.add_option("-DDST_OFFSET=" + support::cpp11::to_string(oqinfo.offset)); build_opts.add_option("-DZERO_VALUE=" + support::cpp11::to_string(zero_value_s32)); build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::S32)); - build_opts.add_option("-DDST_MULTIPLIERS_DATA_TYPE=" + get_cl_type_from_data_type(_output_multipliers->info()->data_type())); - build_opts.add_option("-DDST_SHIFTS_DATA_TYPE=" + get_cl_type_from_data_type(_output_shifts->info()->data_type())); - build_opts.add_option_if_else(weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL, "-DQUANTIZATION_TYPE=PER_CHANNEL", "-DQUANTIZATION_TYPE=PER_TENSOR"); + build_opts.add_option("-DDST_MULTIPLIERS_DATA_TYPE=" + + get_cl_type_from_data_type(_output_multipliers->info()->data_type())); + build_opts.add_option("-DDST_SHIFTS_DATA_TYPE=" + + get_cl_type_from_data_type(_output_shifts->info()->data_type())); + build_opts.add_option_if_else(weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL, + "-DQUANTIZATION_TYPE=PER_CHANNEL", "-DQUANTIZATION_TYPE=PER_TENSOR"); // Note: We expect the input and output tensors to always adopt a per-tensor quantization approach int a_val{}; int b_val{}; - std::tie(b_val, a_val) = get_quantized_activation_min_max(conv_info.act_info, input->info()->data_type(), oqinfo); + std::tie(b_val, a_val) = + get_quantized_activation_min_max(conv_info.act_info, input->info()->data_type(), oqinfo); build_opts.add_option_if(conv_info.act_info.enabled(), "-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option_if(conv_info.act_info.enabled(), "-DB_VAL=" + support::cpp11::to_string(b_val)); @@ -321,8 +364,10 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & { kernel_name = "dwc_native_fp_nhwc"; build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option_if(conv_info.act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(conv_info.act_info.a())); - build_opts.add_option_if(conv_info.act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(conv_info.act_info.b())); + build_opts.add_option_if(conv_info.act_info.enabled(), + "-DA_VAL=" + float_to_string_with_full_precision(conv_info.act_info.a())); + build_opts.add_option_if(conv_info.act_info.enabled(), + "-DB_VAL=" + float_to_string_with_full_precision(conv_info.act_info.b())); } Window win = calculate_max_window(*(output->info()), Steps(n0, m0)); @@ -350,10 +395,17 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & _config_id += string_from_data_type(input->info()->data_type()); } -Status CLDepthwiseConvolutionLayerNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts) +Status CLDepthwiseConvolutionLayerNativeKernel::validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ITensorInfo *output_multipliers, + const ITensorInfo *output_shifts) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, dwc_info, conv_info, output_multipliers, output_shifts)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_arguments(input, weights, biases, output, dwc_info, conv_info, output_multipliers, output_shifts)); return Status{}; } @@ -370,47 +422,52 @@ void CLDepthwiseConvolutionLayerNativeKernel::run(const Window &window, cl::Comm cl::Image2D input_cl_image; cl::Image2D weights_cl_image; - if(_export_input_to_cl_image || _export_weights_to_cl_image) + if (_export_input_to_cl_image || _export_weights_to_cl_image) { // Export cl_buffer to cl_image - if(_export_input_to_cl_image) + if (_export_input_to_cl_image) { - const size_t image_w = _input->info()->dimension(0) / 4; - const size_t image_h = _input->info()->dimension(1) * _input->info()->dimension(2) * _input->info()->dimension(3); + const size_t image_w = _input->info()->dimension(0) / 4; + const size_t image_h = + _input->info()->dimension(1) * _input->info()->dimension(2) * _input->info()->dimension(3); const TensorShape shape2d(image_w, image_h); const size_t image_row_pitch = _input->info()->strides_in_bytes()[1]; - input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _input->cl_buffer(), shape2d, _input->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); + input_cl_image = + create_image2d_from_buffer(CLKernelLibrary::get().context(), _input->cl_buffer(), shape2d, + _input->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } - if(_export_weights_to_cl_image) + if (_export_weights_to_cl_image) { - const size_t image_w = _weights->info()->dimension(0) / 4; - const size_t image_h = _weights->info()->dimension(1) * _weights->info()->dimension(2) * _weights->info()->dimension(3); + const size_t image_w = _weights->info()->dimension(0) / 4; + const size_t image_h = + _weights->info()->dimension(1) * _weights->info()->dimension(2) * _weights->info()->dimension(3); const TensorShape shape2d(image_w, image_h); const size_t image_row_pitch = _weights->info()->strides_in_bytes()[1]; - weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _weights->cl_buffer(), shape2d, _weights->info()->data_type(), image_row_pitch, - CLImage2DType::ReadOnly); + weights_cl_image = + create_image2d_from_buffer(CLKernelLibrary::get().context(), _weights->cl_buffer(), shape2d, + _weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } } unsigned int idx = 0; - if(_export_input_to_cl_image) + if (_export_input_to_cl_image) { _kernel.setArg(idx++, input_cl_image); } add_4d_tensor_nhwc_argument(idx, _input); add_4d_tensor_nhwc_argument(idx, _output); - if(_export_weights_to_cl_image) + if (_export_weights_to_cl_image) { _kernel.setArg(idx++, weights_cl_image); } add_4d_tensor_nhwc_argument(idx, _weights); - if(_is_quantized) + if (_is_quantized) { add_1D_tensor_argument(idx, _output_multipliers, slice); add_1D_tensor_argument(idx, _output_shifts, slice); } - if(_biases != nullptr) + if (_biases != nullptr) { add_1D_tensor_argument(idx, _biases, slice); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h index 8eee7b2500..d34a662966 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h @@ -24,11 +24,11 @@ #ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H #define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H -#include "src/core/CL/ICLKernel.h" - #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/function_info/ConvolutionInfo.h" +#include "src/core/CL/ICLKernel.h" + namespace arm_compute { class ICLTensor; @@ -74,15 +74,28 @@ public: * * no padding * * no change of data layout after configure */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info, - const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ICLTensor *output_multipliers = nullptr, + const ICLTensor *output_shifts = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel * * Similar to @ref CLDepthwiseConvolutionLayerNativeKernel::configure() */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info, - const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr); + void configure(ICLTensor *input, + const ICLTensor *weights, + const ICLTensor *biases, + ICLTensor *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ICLTensor *output_multipliers = nullptr, + const ICLTensor *output_shifts = nullptr); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel * @@ -90,23 +103,29 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info, - const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr); + static Status validate(const ITensorInfo *input, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *output, + const DWCComputeKernelInfo &dwc_info, + const ConvolutionInfo &conv_info, + const ITensorInfo *output_multipliers = nullptr, + const ITensorInfo *output_shifts = nullptr); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; private: - const ICLTensor *_input {}; + const ICLTensor *_input{}; const ICLTensor *_weights{}; const ICLTensor *_biases{}; ICLTensor *_output{}; - unsigned int _depth_multiplier{ 0 }; + unsigned int _depth_multiplier{0}; const ICLTensor *_output_multipliers{}; const ICLTensor *_output_shifts{}; - bool _export_input_to_cl_image{ false }; - bool _export_weights_to_cl_image{ true }; - bool _is_quantized{ false }; + bool _export_input_to_cl_image{false}; + bool _export_weights_to_cl_image{true}; + bool _is_quantized{false}; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp index 9b514ed705..3d8f875ef7 100644 --- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -37,17 +38,20 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *idx, + const FFTDigitReverseKernelInfo &config) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(idx, 1, DataType::U32); - ARM_COMPUTE_RETURN_ERROR_ON(std::set({ 0, 1 }).count(config.axis) == 0); + ARM_COMPUTE_RETURN_ERROR_ON(std::set({0, 1}).count(config.axis) == 0); ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] != idx->tensor_shape().x()); // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) + if ((output != nullptr) && (output->total_size() != 0)) { ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 2); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); @@ -57,7 +61,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *idx, const FFTDigitReverseKernelInfo &config) +std::pair validate_and_configure_window(ITensorInfo *input, + ITensorInfo *output, + ITensorInfo *idx, + const FFTDigitReverseKernelInfo &config) { ARM_COMPUTE_UNUSED(idx, config); @@ -69,21 +76,27 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLFFTDigitReverseKernel::CLFFTDigitReverseKernel() - : _input(nullptr), _output(nullptr), _idx(nullptr) +CLFFTDigitReverseKernel::CLFFTDigitReverseKernel() : _input(nullptr), _output(nullptr), _idx(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLFFTDigitReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config) +void CLFFTDigitReverseKernel::configure(const ICLTensor *input, + ICLTensor *output, + const ICLTensor *idx, + const FFTDigitReverseKernelInfo &config) { configure(CLKernelLibrary::get().get_compile_context(), input, output, idx, config); } -void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config) +void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *idx, + const FFTDigitReverseKernelInfo &config) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, idx); - auto padding_info = get_padding_info({ input, output, idx }); + auto padding_info = get_padding_info({input, output, idx}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), idx->info(), config)); _input = input; @@ -114,10 +127,14 @@ void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context, ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLFFTDigitReverseKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config) +Status CLFFTDigitReverseKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *idx, + const FFTDigitReverseKernelInfo &config) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, idx, config)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), idx->clone().get(), config).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), output->clone().get(), idx->clone().get(), config).first); return Status{}; } @@ -137,7 +154,6 @@ void CLFFTDigitReverseKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _output, slice); add_1D_tensor_argument(idx, _idx, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h index e5583a4c22..fdd1bcc3d3 100644 --- a/src/core/CL/kernels/CLFFTDigitReverseKernel.h +++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H #define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H -#include "src/core/CL/ICLKernel.h" - #include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLKernel.h" + namespace arm_compute { // Forward declarations @@ -56,7 +56,8 @@ public: * @param[in] idx Digit reverse index tensor. Data type supported: U32 * @param[in] config Kernel configuration. */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + void + configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -65,7 +66,11 @@ public: * @param[in] idx Digit reverse index tensor. Data type supported: U32 * @param[in] config Kernel configuration. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *idx, + const FFTDigitReverseKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel * * @param[in] input Source tensor info. Data types supported: F16/F32. @@ -75,7 +80,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *idx, + const FFTDigitReverseKernelInfo &config); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp index 95f4b640bd..3729e6b77d 100644 --- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp @@ -29,6 +29,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -46,11 +47,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(CLFFTRadixStageKernel::supported_radix().count(config.radix) == 0); - ARM_COMPUTE_RETURN_ERROR_ON(std::set({ 0, 1 }).count(config.axis) == 0); + ARM_COMPUTE_RETURN_ERROR_ON(std::set({0, 1}).count(config.axis) == 0); ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] % config.radix); // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) + if ((output != nullptr) && (output->total_size() != 0)) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -59,9 +60,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const FFTRadixStageKernelInfo &config) +std::pair +validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const FFTRadixStageKernelInfo &config) { - if(output != nullptr) + if (output != nullptr) { auto_init_if_empty(*output, *input); } @@ -76,8 +78,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } } // namespace -CLFFTRadixStageKernel::CLFFTRadixStageKernel() - : _input(nullptr), _output(nullptr), _run_in_place(false) +CLFFTRadixStageKernel::CLFFTRadixStageKernel() : _input(nullptr), _output(nullptr), _run_in_place(false) { _type = CLKernelType::ELEMENTWISE; } @@ -87,11 +88,15 @@ void CLFFTRadixStageKernel::configure(ICLTensor *input, ICLTensor *output, const configure(CLKernelLibrary::get().get_compile_context(), input, output, config); } -void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config) +void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + const FFTRadixStageKernelInfo &config) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, config)); - auto padding_info = get_padding_info({ input, output }); + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, config)); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; @@ -110,11 +115,12 @@ void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, I _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set static arguments if not the first stage - if(!config.is_first_stage) + if (!config.is_first_stage) { const unsigned int Ni = config.Nx * config.radix; const float exp_const = (-2.0 * M_PI) / static_cast(Ni); - unsigned int idx = (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters + unsigned int idx = + (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters _kernel.setArg(idx++, config.Nx); _kernel.setArg(idx++, Ni); _kernel.setArg(idx, exp_const); @@ -136,21 +142,22 @@ void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, I ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLFFTRadixStageKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config) +Status CLFFTRadixStageKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const FFTRadixStageKernelInfo &config) { const bool run_in_place = (output == nullptr) || (output == input); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, config)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), - (run_in_place) ? nullptr : output->clone().get(), - config) - .first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get(), config) + .first); return Status{}; } std::set CLFFTRadixStageKernel::supported_radix() { - return std::set { 2, 3, 4, 5, 7, 8 }; + return std::set{2, 3, 4, 5, 7, 8}; } void CLFFTRadixStageKernel::run(const Window &window, cl::CommandQueue &queue) @@ -165,12 +172,11 @@ void CLFFTRadixStageKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); - if(!_run_in_place) + if (!_run_in_place) { add_3D_tensor_argument(idx, _output, slice); } enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h index 9bb310db83..de80bfced3 100644 --- a/src/core/CL/kernels/CLFFTRadixStageKernel.h +++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H #define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H -#include "src/core/CL/ICLKernel.h" - #include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLKernel.h" + #include namespace arm_compute @@ -69,7 +69,10 @@ public: * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input * @param[in] config FFT descriptor metadata. */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + const FFTRadixStageKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel * * @param[in] input Source tensor info. Data types supported: F16/F32. diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp index 8a714d71bf..be6e16b074 100644 --- a/src/core/CL/kernels/CLFFTScaleKernel.cpp +++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -43,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32); // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) + if ((output != nullptr) && (output->total_size() != 0)) { ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 1 && output->num_channels() != 2); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); @@ -54,8 +55,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) } } // namespace -CLFFTScaleKernel::CLFFTScaleKernel() - : _input(nullptr), _output(nullptr), _run_in_place(false) +CLFFTScaleKernel::CLFFTScaleKernel() : _input(nullptr), _output(nullptr), _run_in_place(false) { _type = CLKernelType::ELEMENTWISE; } @@ -65,11 +65,14 @@ void CLFFTScaleKernel::configure(ICLTensor *input, ICLTensor *output, const FFTS configure(CLKernelLibrary::get().get_compile_context(), input, output, config); } -void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config) +void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + const FFTScaleKernelInfo &config) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; @@ -78,20 +81,22 @@ void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, ICLTen // Create kernel CLBuildOptions build_opts; build_opts.add_option_if(_run_in_place, "-DIN_PLACE"); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() : input->info()->num_channels())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() + : input->info()->num_channels())); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option_if(config.conjugate, "-DCONJ"); std::string kernel_name = "fft_scale_conj"; _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set static arguments - unsigned int idx = (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters + unsigned int idx = + (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters _kernel.setArg(idx, config.scale); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); - if(output != nullptr) + if (output != nullptr) { // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), *input->info()->clone()); @@ -130,12 +135,11 @@ void CLFFTScaleKernel::run(const Window &window, cl::CommandQueue &queue) { unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); - if(!_run_in_place) + if (!_run_in_place) { add_3D_tensor_argument(idx, _output, slice); } enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h index cc518be193..b995282e02 100644 --- a/src/core/CL/kernels/CLFFTScaleKernel.h +++ b/src/core/CL/kernels/CLFFTScaleKernel.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H #define ARM_COMPUTE_CLFFTSCALEKERNEL_H -#include "src/core/CL/ICLKernel.h" - #include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLKernel.h" + namespace arm_compute { // Forward declarations @@ -63,7 +63,10 @@ public: * @param[out] output Destination tensor. Data type supported: same as @p input * @param[in] config Kernel configuration */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + const FFTScaleKernelInfo &config); /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel * * @param[in] input Source tensor info. Data types supported: F16/F32. diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index fcd99a4ed9..86bb502da3 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -31,14 +31,14 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/Validate.h" + #include "src/core/helpers/WindowHelpers.h" #include "support/Cast.h" #include "support/StringSupport.h" namespace arm_compute { -CLFillBorderKernel::CLFillBorderKernel() - : ICLKernel(), _tensor(nullptr) +CLFillBorderKernel::CLFillBorderKernel() : ICLKernel(), _tensor(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -56,27 +56,38 @@ void CLFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue ICLKernel::add_argument(idx, static_cast(value)); } -void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +void CLFillBorderKernel::configure(ICLTensor *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value) { configure(CLKernelLibrary::get().get_compile_context(), tensor, border_size, border_mode, constant_border_value); } -void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +void CLFillBorderKernel::configure(const CLCompileContext &compile_context, + ICLTensor *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value) { _tensor = tensor; configure(compile_context, tensor->info(), border_size, border_mode, constant_border_value); } -void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +void CLFillBorderKernel::configure(const CLCompileContext &compile_context, + ITensorInfo *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); ARM_COMPUTE_ERROR_ON(tensor->num_channels() != 1); - auto padding_info = get_padding_info({ tensor }); + auto padding_info = get_padding_info({tensor}); border_size.limit(tensor->padding()); // If there is no border: early exit - if(border_size.empty() || border_mode == BorderMode::UNDEFINED) + if (border_size.empty() || border_mode == BorderMode::UNDEFINED) { return; } @@ -98,25 +109,22 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Create static kernel arguments - const unsigned int valid_width = tensor->valid_region().shape[0]; - const unsigned int valid_height = tensor->valid_region().shape[1]; - const cl_int2 valid_region_coords = - { - { - static_cast(tensor->valid_region().anchor[0]), - static_cast(tensor->valid_region().anchor[1]), - } - }; - const unsigned int total_valid_width = border_size.left + valid_width + border_size.right; + const unsigned int valid_width = tensor->valid_region().shape[0]; + const unsigned int valid_height = tensor->valid_region().shape[1]; + const cl_int2 valid_region_coords = {{ + static_cast(tensor->valid_region().anchor[0]), + static_cast(tensor->valid_region().anchor[1]), + }}; + const unsigned int total_valid_width = border_size.left + valid_width + border_size.right; // Set static kernel arguments unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters ICLKernel::add_argument(idx, valid_width); ICLKernel::add_argument(idx, valid_height); ICLKernel::add_argument(idx, valid_region_coords); - if(BorderMode::CONSTANT == border_mode) + if (BorderMode::CONSTANT == border_mode) { - switch(dt) + switch (dt) { case DataType::U8: case DataType::QASYMM8: @@ -175,12 +183,13 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { // Border mode undefined or border width == 0 - if(_kernel() == nullptr) + if (_kernel() == nullptr) { return; } - const auto tensor = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + const auto tensor = + utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); @@ -193,14 +202,13 @@ void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl:: unsigned int idx = 0; add_3D_tensor_argument(idx, tensor, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) { // Border mode undefined or border width == 0 - if(_kernel() == nullptr) + if (_kernel() == nullptr) { return; } @@ -216,7 +224,6 @@ void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _tensor, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLFillBorderKernel.h b/src/core/CL/kernels/CLFillBorderKernel.h index 7951f48171..5782143cf9 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.h +++ b/src/core/CL/kernels/CLFillBorderKernel.h @@ -26,6 +26,7 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -57,7 +58,11 @@ public: * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + void configure(const CLCompileContext &compile_context, + ICLTensor *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value = PixelValue()); /** Initialise the kernel's input, output and border mode. * * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32. @@ -65,7 +70,10 @@ public: * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + void configure(ICLTensor *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value = PixelValue()); /** Initialise the kernel's input, output and border mode. * * @param[in] compile_context The compile context to be used. @@ -74,7 +82,11 @@ public: * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + void configure(const CLCompileContext &compile_context, + ITensorInfo *tensor, + BorderSize border_size, + BorderMode border_mode, + const PixelValue &constant_border_value = PixelValue()); /** Function to set the constant value on fill border kernel depending on type. * diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp index 68fe324df6..7da0679ae4 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp @@ -30,20 +30,26 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias, const ITensorInfo *bn_beta, const ITensorInfo *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +Status validate_arguments(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias, + const ITensorInfo *bn_beta, + const ITensorInfo *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { ARM_COMPUTE_UNUSED(epsilon); ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var); @@ -54,43 +60,44 @@ Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *b ARM_COMPUTE_RETURN_ERROR_ON(input_bias == nullptr && fused_bias == nullptr); ARM_COMPUTE_RETURN_ERROR_ON(bn_mean->num_dimensions() > 1); - if(fbn_type == FuseBatchNormalizationType::CONVOLUTION) + if (fbn_type == FuseBatchNormalizationType::CONVOLUTION) { ARM_COMPUTE_RETURN_ERROR_ON(input_weights->dimension(3) != bn_mean->dimension(0)); } else { - const size_t channel_idx = get_data_layout_dimension_index(input_weights->data_layout(), DataLayoutDimension::CHANNEL); + const size_t channel_idx = + get_data_layout_dimension_index(input_weights->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(input_weights->dimension(channel_idx) != bn_mean->dimension(0)); } // Validate bias - if(input_bias != nullptr) + if (input_bias != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, input_bias); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, input_bias); } // Validate beta - if(bn_beta != nullptr) + if (bn_beta != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, bn_beta); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, bn_beta); } // Validate gamma - if(bn_gamma != nullptr) + if (bn_gamma != nullptr) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, bn_gamma); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, bn_gamma); } // Validate output weights - if(fused_weights != nullptr && fused_weights->total_size() != 0) + if (fused_weights != nullptr && fused_weights->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_weights, fused_weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input_weights, fused_weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, fused_weights); } // Validate output bias - if(fused_bias != nullptr && fused_bias->total_size() != 0) + if (fused_bias != nullptr && fused_bias->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, fused_bias); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, fused_bias); @@ -101,28 +108,52 @@ Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *b } // namespace CLFuseBatchNormalizationKernel::CLFuseBatchNormalizationKernel() - : _input_weights(nullptr), _input_bias(nullptr), _bn_mean(nullptr), _bn_var(nullptr), _bn_gamma(nullptr), _bn_beta(nullptr), _fused_weights(nullptr), _fused_bias(nullptr), _epsilon(), - _run_in_place_weights(false), _run_in_place_bias(false) + : _input_weights(nullptr), + _input_bias(nullptr), + _bn_mean(nullptr), + _bn_var(nullptr), + _bn_gamma(nullptr), + _bn_beta(nullptr), + _fused_weights(nullptr), + _fused_bias(nullptr), + _epsilon(), + _run_in_place_weights(false), + _run_in_place_bias(false) { _type = CLKernelType::ELEMENTWISE; } -void CLFuseBatchNormalizationKernel::configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, - ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +void CLFuseBatchNormalizationKernel::configure(const ICLTensor *input_weights, + const ICLTensor *bn_mean, + const ICLTensor *bn_var, + ICLTensor *fused_weights, + ICLTensor *fused_bias, + const ICLTensor *input_bias, + const ICLTensor *bn_beta, + const ICLTensor *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { - configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type); + configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias, + input_bias, bn_beta, bn_gamma, epsilon, fbn_type); } -void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, - ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input_weights, + const ICLTensor *bn_mean, + const ICLTensor *bn_var, + ICLTensor *fused_weights, + ICLTensor *fused_bias, + const ICLTensor *input_bias, + const ICLTensor *bn_beta, + const ICLTensor *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var); - auto padding_info = get_padding_info({ input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma }); + auto padding_info = + get_padding_info({input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma}); _input_weights = input_weights; _input_bias = input_bias; @@ -135,28 +166,28 @@ void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_c _epsilon = epsilon; _run_in_place_weights = (fused_weights == nullptr) || (fused_weights == input_weights); - _run_in_place_bias = (input_bias != nullptr && fused_bias == nullptr) || (input_bias != nullptr && fused_bias == input_bias); + _run_in_place_bias = + (input_bias != nullptr && fused_bias == nullptr) || (input_bias != nullptr && fused_bias == input_bias); // Auto initialize outputs - if(_fused_weights != nullptr) + if (_fused_weights != nullptr) { // Output tensor auto initialization if not yet initialized auto_init_if_empty(*_fused_weights->info(), *_input_weights->info()->clone()); } - if(_fused_bias != nullptr) + if (_fused_bias != nullptr) { // Output tensor auto initialization if not yet initialized auto_init_if_empty(*_fused_bias->info(), *_bn_mean->info()->clone()); } // Validate arguments - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_weights->info(), bn_mean->info(), bn_var->info(), - (fused_weights != nullptr) ? fused_weights->info() : nullptr, - (fused_bias != nullptr) ? fused_bias->info() : nullptr, - (input_bias != nullptr) ? input_bias->info() : nullptr, - (bn_beta != nullptr) ? bn_beta->info() : nullptr, - (bn_gamma != nullptr) ? bn_gamma->info() : nullptr, - epsilon, fbn_type)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments( + input_weights->info(), bn_mean->info(), bn_var->info(), + (fused_weights != nullptr) ? fused_weights->info() : nullptr, + (fused_bias != nullptr) ? fused_bias->info() : nullptr, (input_bias != nullptr) ? input_bias->info() : nullptr, + (bn_beta != nullptr) ? bn_beta->info() : nullptr, (bn_gamma != nullptr) ? bn_gamma->info() : nullptr, epsilon, + fbn_type)); // Configure kernel window Window win = calculate_max_window(*input_weights->info()); @@ -165,7 +196,8 @@ void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_c // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input_weights->info()->data_type())); - build_opts.add_option_if(fbn_type == FuseBatchNormalizationType::CONVOLUTION, "-DDIM2=" + support::cpp11::to_string(input_weights->info()->dimension(2))); + build_opts.add_option_if(fbn_type == FuseBatchNormalizationType::CONVOLUTION, + "-DDIM2=" + support::cpp11::to_string(input_weights->info()->dimension(2))); build_opts.add_option("-DEPSILON=" + float_to_string_with_full_precision(epsilon)); build_opts.add_option_if(_input_weights->info()->data_layout() == DataLayout::NHWC, "-DNHWC"); build_opts.add_option_if(_run_in_place_weights, "-DIN_PLACE_W"); @@ -180,12 +212,19 @@ void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_c ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias, const ITensorInfo *bn_beta, const ITensorInfo *bn_gamma, - float epsilon, FuseBatchNormalizationType fbn_type) +Status CLFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias, + const ITensorInfo *bn_beta, + const ITensorInfo *bn_gamma, + float epsilon, + FuseBatchNormalizationType fbn_type) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_weights, bn_mean, bn_var, fused_weights, fused_bias, + input_bias, bn_beta, bn_gamma, epsilon, fbn_type)); return Status{}; } @@ -202,25 +241,25 @@ void CLFuseBatchNormalizationKernel::run(const arm_compute::Window &window, cl:: // Add kernel arguments unsigned int idx = 0; add_3D_tensor_argument(idx, _input_weights, slice_3d); - if(_input_bias != nullptr) + if (_input_bias != nullptr) { add_1D_tensor_argument(idx, _input_bias, slice_1d); } add_1D_tensor_argument(idx, _bn_mean, slice_1d); add_1D_tensor_argument(idx, _bn_var, slice_1d); - if(!_run_in_place_weights) + if (!_run_in_place_weights) { add_3D_tensor_argument(idx, _fused_weights, slice_3d); } - if(!_run_in_place_bias) + if (!_run_in_place_bias) { add_1D_tensor_argument(idx, _fused_bias, slice_1d); } - if(_bn_beta != nullptr) + if (_bn_beta != nullptr) { add_1D_tensor_argument(idx, _bn_beta, slice_1d); } - if(_bn_gamma != nullptr) + if (_bn_gamma != nullptr) { add_1D_tensor_argument(idx, _bn_gamma, slice_1d); } diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h index 78b1e74cab..76ec7a759f 100644 --- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h +++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h @@ -62,9 +62,16 @@ public: * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. */ - void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + void configure(const ICLTensor *input_weights, + const ICLTensor *bn_mean, + const ICLTensor *bn_var, + ICLTensor *fused_weights, + ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, + const ICLTensor *bn_beta = nullptr, + const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); /** Set the source, destination of the kernel * * @param[in] compile_context The compile context to be used. @@ -81,9 +88,17 @@ public: * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f. * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias, - const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input_weights, + const ICLTensor *bn_mean, + const ICLTensor *bn_var, + ICLTensor *fused_weights, + ICLTensor *fused_bias, + const ICLTensor *input_bias = nullptr, + const ICLTensor *bn_beta = nullptr, + const ICLTensor *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel * * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC @@ -101,10 +116,16 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var, - const ITensorInfo *fused_weights, const ITensorInfo *fused_bias, - const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr, - float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); + static Status validate(const ITensorInfo *input_weights, + const ITensorInfo *bn_mean, + const ITensorInfo *bn_var, + const ITensorInfo *fused_weights, + const ITensorInfo *fused_bias, + const ITensorInfo *input_bias = nullptr, + const ITensorInfo *bn_beta = nullptr, + const ITensorInfo *bn_gamma = nullptr, + float epsilon = 0.001f, + FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp index 5495023b80..c11a18940a 100644 --- a/src/core/CL/kernels/CLGatherKernel.cpp +++ b/src/core/CL/kernels/CLGatherKernel.cpp @@ -22,8 +22,10 @@ * SOFTWARE. */ #include "src/core/CL/kernels/CLGatherKernel.h" + #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -34,7 +36,8 @@ namespace arm_compute { namespace { -inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) +inline Status +validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); const uint32_t actual_axis = wrap_around(axis, static_cast(input->num_dimensions())); @@ -43,11 +46,12 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape( + input->tensor_shape(), indices->tensor_shape(), actual_axis); ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); } @@ -56,12 +60,14 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, ITensorInfo *output, int axis) +std::pair +validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, ITensorInfo *output, int axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); const uint32_t actual_axis = wrap_around(axis, static_cast(input->num_dimensions())); // Output auto initialization if not yet initialized - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape( + input->tensor_shape(), indices->tensor_shape(), actual_axis); auto_init_if_empty((*output), output_shape, 1, input->data_type()); // Create window @@ -72,8 +78,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace -CLGatherKernel::CLGatherKernel() - : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) +CLGatherKernel::CLGatherKernel() : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0) { _type = CLKernelType::ELEMENTWISE; } @@ -83,10 +88,14 @@ void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices, configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, axis); } -void CLGatherKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis) +void CLGatherKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *indices, + ICLTensor *output, + int axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); - auto padding_info = get_padding_info({ input, output, indices }); + auto padding_info = get_padding_info({input, output, indices}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis)); // Configure kernel window @@ -100,7 +109,8 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC // Set build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option("-DINDICES_DIM_Z=" + support::cpp11::to_string(indices->info()->dimension(2))); build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2))); @@ -114,10 +124,12 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) +Status +CLGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, indices, output, axis)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis).first); return Status{}; } diff --git a/src/core/CL/kernels/CLGatherKernel.h b/src/core/CL/kernels/CLGatherKernel.h index 8f472a4696..db4b49d2f5 100644 --- a/src/core/CL/kernels/CLGatherKernel.h +++ b/src/core/CL/kernels/CLGatherKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLGATHERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -63,7 +64,11 @@ public: * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *indices, + ICLTensor *output, + int axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel * @@ -74,7 +79,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); + static Status + validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp index 088c454f3c..b9ff72b928 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -47,7 +48,7 @@ Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anc ARM_COMPUTE_RETURN_ERROR_ON(anchors->dimension(0) != info.values_per_roi()); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(anchors, DataType::QSYMM16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(anchors->num_dimensions() > 2); - if(all_anchors->total_size() > 0) + if (all_anchors->total_size() > 0) { size_t feature_height = info.feat_height(); size_t feature_width = info.feat_width(); @@ -57,7 +58,7 @@ Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anc ARM_COMPUTE_RETURN_ERROR_ON(all_anchors->dimension(0) != info.values_per_roi()); ARM_COMPUTE_RETURN_ERROR_ON(all_anchors->dimension(1) != feature_height * feature_width * num_anchors); - if(is_data_type_quantized(anchors->data_type())) + if (is_data_type_quantized(anchors->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(anchors, all_anchors); } @@ -66,21 +67,25 @@ Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anc } } // namespace -CLComputeAllAnchorsKernel::CLComputeAllAnchorsKernel() - : _anchors(nullptr), _all_anchors(nullptr) +CLComputeAllAnchorsKernel::CLComputeAllAnchorsKernel() : _anchors(nullptr), _all_anchors(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info) +void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors, + ICLTensor *all_anchors, + const ComputeAnchorsInfo &info) { configure(CLKernelLibrary::get().get_compile_context(), anchors, all_anchors, info); } -void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info) +void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *anchors, + ICLTensor *all_anchors, + const ComputeAnchorsInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(anchors, all_anchors); - auto padding_info = get_padding_info({ anchors, all_anchors }); + auto padding_info = get_padding_info({anchors, all_anchors}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(anchors->info(), all_anchors->info(), info)); // Metadata @@ -91,7 +96,8 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex // Initialize the output if empty const TensorShape output_shape(info.values_per_roi(), width * height * num_anchors); - auto_init_if_empty(*all_anchors->info(), TensorInfo(output_shape, 1, data_type, anchors->info()->quantization_info())); + auto_init_if_empty(*all_anchors->info(), + TensorInfo(output_shape, 1, data_type, anchors->info()->quantization_info())); // Set instance variables _anchors = anchors; @@ -108,7 +114,7 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex build_opts.add_option("-DNUM_ANCHORS=" + support::cpp11::to_string(num_anchors)); build_opts.add_option("-DNUM_ROI_FIELDS=" + support::cpp11::to_string(info.values_per_roi())); - if(is_quantized) + if (is_quantized) { const UniformQuantizationInfo qinfo = anchors->info()->quantization_info().uniform(); build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); @@ -116,8 +122,9 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex } // Create kernel - const std::string kernel_name = (is_quantized) ? "generate_proposals_compute_all_anchors_quantized" : "generate_proposals_compute_all_anchors"; - _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); + const std::string kernel_name = + (is_quantized) ? "generate_proposals_compute_all_anchors_quantized" : "generate_proposals_compute_all_anchors"; + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // The tensor all_anchors can be interpreted as an array of structs (each structs has values_per_roi fields). // This means we don't need to pad on the X dimension, as we know in advance how many fields @@ -127,7 +134,9 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLComputeAllAnchorsKernel::validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info) +Status CLComputeAllAnchorsKernel::validate(const ITensorInfo *anchors, + const ITensorInfo *all_anchors, + const ComputeAnchorsInfo &info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(anchors, all_anchors, info)); return Status{}; diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h index d26795ac7d..e08f281d6c 100644 --- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h +++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h @@ -62,7 +62,10 @@ public: * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *anchors, + ICLTensor *all_anchors, + const ComputeAnchorsInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel * @@ -81,5 +84,5 @@ private: const ICLTensor *_anchors; ICLTensor *_all_anchors; }; -} // arm_compute +} // namespace arm_compute #endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp index 7ed323c950..b13eb16556 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -39,17 +40,20 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const InstanceNormalizationLayerKernelInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.epsilon == 0.f, "Epsilon must be different than 0"); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32); - if(output != nullptr && output->total_size() != 0) + if (output != nullptr && output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), "Input and output have different number of channels"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), + "Input and output have different number of channels"); } return Status{}; @@ -59,27 +63,30 @@ Status validate_arguments_meanvar(const ITensorInfo *input, const ITensorInfo *o { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32); - if(output != nullptr && output->total_size() != 0) + if (output != nullptr && output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), "Input and output have different number of channels"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), + "Input and output have different number of channels"); } return Status{}; } } // namespace -CLComputeMeanVariance::CLComputeMeanVariance() - : _input(nullptr), _output(nullptr) +CLComputeMeanVariance::CLComputeMeanVariance() : _input(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision) +void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + bool use_mixed_precision) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output == nullptr ? input : output; @@ -88,7 +95,8 @@ void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, I const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); CLBuildOptions build_opts; - build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option("-DINTERNAL_DATA_TYPE=" + + (use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type()))); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0))); @@ -108,7 +116,7 @@ void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, I const TensorShape out_shape(input_channel, 2u, input_batches); // Output auto initialization if not yet initialized - if(use_mixed_precision) + if (use_mixed_precision) { auto_init_if_empty(*_output->info(), out_shape, 1, DataType::F32); } @@ -134,7 +142,7 @@ void CLComputeMeanVariance::run(const Window &window, cl::CommandQueue &queue) Window collapsed_window = window.collapse(window, Window::DimZ); // We will process the planes together - if(_input->info()->data_layout() == DataLayout::NCHW) + if (_input->info()->data_layout() == DataLayout::NCHW) { collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1)); collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1)); @@ -157,10 +165,14 @@ CLInstanceNormalizationLayerKernel::CLInstanceNormalizationLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info) +void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *mean_var, + ICLTensor *output, + const InstanceNormalizationLayerKernelInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output == nullptr ? input : output; @@ -172,7 +184,9 @@ void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compi CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (info.use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (info.use_mixed_precision + ? "float" + : get_cl_type_from_data_type(input->info()->data_type()))); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.add_option("-DDIM_Y=" + support::cpp11::to_string(input->info()->dimension(1))); @@ -188,7 +202,7 @@ void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compi // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(1)); - if(output != nullptr) + if (output != nullptr) { auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type()); } @@ -197,7 +211,9 @@ void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compi ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLInstanceNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info) +Status CLInstanceNormalizationLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const InstanceNormalizationLayerKernelInfo &info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info)); return Status{}; @@ -211,7 +227,7 @@ void CLInstanceNormalizationLayerKernel::run(const Window &window, cl::CommandQu Window collapsed_window = window.collapse(window, Window::DimZ); // We will process the planes together - if(_input->info()->data_layout() == DataLayout::NCHW) + if (_input->info()->data_layout() == DataLayout::NCHW) { collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1)); collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1)); @@ -226,7 +242,7 @@ void CLInstanceNormalizationLayerKernel::run(const Window &window, cl::CommandQu add_4D_tensor_argument(idx, _input, collapsed_window); add_3D_tensor_argument(idx, _mean, collapsed_window); - if(!_run_in_place) + if (!_run_in_place) { add_4D_tensor_argument(idx, _output, collapsed_window); } diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h index 2f9014a651..9f436da7f6 100644 --- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h +++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h @@ -24,10 +24,10 @@ #ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H #define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H -#include "src/core/CL/ICLKernel.h" - #include "arm_compute/core/KernelDescriptors.h" +#include "src/core/CL/ICLKernel.h" + namespace arm_compute { // Forward declarations @@ -59,7 +59,11 @@ public: * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] info Kernel meta-data descriptor */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *mean_var, + ICLTensor *output, + const InstanceNormalizationLayerKernelInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. * @@ -69,7 +73,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -106,7 +111,8 @@ public: * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * @param[in] use_mixed_precision Use mixed precision in case of FP16 execution */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision); + void + configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision); /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer. * diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 542d380e4a..9ed9d7c5b0 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -31,10 +31,10 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/Validate.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" namespace arm_compute @@ -43,7 +43,8 @@ namespace { constexpr int max_input_tensor_dim = 3; -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon) +Status +validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon) { ARM_COMPUTE_UNUSED(epsilon); @@ -53,14 +54,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis > 2, "Actual axis greater than 2 is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, "Actual normalization axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, + "Actual normalization axis greater than max number of dimensions"); // Reduce shape on axis TensorShape sum_shape = input->tensor_shape(); sum_shape.set(actual_axis, 1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); @@ -78,16 +80,22 @@ CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon) +void CLL2NormalizeLayerKernel::configure( + const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon) { configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, axis, epsilon); } -void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon) +void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *sum, + ICLTensor *output, + int axis, + float epsilon) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon)); - auto padding_info = get_padding_info({ input, sum, output }); + auto padding_info = get_padding_info({input, sum, output}); _input = input; _sum = sum; @@ -95,8 +103,9 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context _actual_axis = wrap_around(axis, max_input_tensor_dim); _epsilon = epsilon; - const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0)); - const int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x; + const unsigned int vec_size_x = + adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0)); + const int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x; // Set build options CLBuildOptions build_opts; @@ -107,7 +116,7 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context // Create kernel std::string kernel_name; unsigned int idx = 0; - switch(_actual_axis) + switch (_actual_axis) { case 0: kernel_name = "l2_normalize_x"; @@ -127,7 +136,7 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Set epsilon argument - if(input->info()->data_type() == DataType::F32) + if (input->info()->data_type() == DataType::F32) { _kernel.setArg(idx, _epsilon); } @@ -146,7 +155,8 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon) +Status CLL2NormalizeLayerKernel::validate( + const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon)); return Status{}; @@ -159,7 +169,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue Window window_sum(window); - switch(_actual_axis) + switch (_actual_axis) { case 0: { @@ -173,8 +183,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue add_2D_tensor_argument(idx, _sum, sum_slice); add_2D_tensor_argument(idx, _output, in_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); + } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); } break; case 1: @@ -189,8 +198,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue add_2D_tensor_argument(idx, _sum, sum_slice); add_2D_tensor_argument(idx, _output, in_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); + } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); } break; case 2: @@ -205,8 +213,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue add_3D_tensor_argument(idx, _sum, sum_slice); add_3D_tensor_argument(idx, _output, in_slice); enqueue(queue, *this, in_slice, lws_hint()); - } - while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice)); + } while (window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice)); } break; default: diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.h b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h index edc0585217..5c9ab94ce5 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.h +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -70,7 +71,12 @@ public: * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2 * @param[in] epsilon Lower bound value for the normalization. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *sum, + ICLTensor *output, + int axis, + float epsilon); /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. * @@ -84,7 +90,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); + static Status + validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp index dc9d68626d..e560f1de4a 100644 --- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -42,26 +43,31 @@ using namespace misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info, + const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, indices); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, indices); - int pool_stride_x = 0; - int pool_stride_y = 0; - PoolingType pool_type = pool_info.pool_type; - const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; + int pool_stride_x = 0; + int pool_stride_y = 0; + PoolingType pool_type = pool_info.pool_type; + const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); - const int pool_size_x = pool_info.pool_size.width; - const int pool_size_y = pool_info.pool_size.height; + const int pool_size_x = pool_info.pool_size.width; + const int pool_size_y = pool_info.pool_size.height; const Size2D pool_size(pool_size_x, pool_size_y); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, + "Pooling indices only supported for MAX pooling method"); ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2"); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); @@ -71,17 +77,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } } // namespace -CLMaxUnpoolingLayerKernel::CLMaxUnpoolingLayerKernel() - : _input(nullptr), _output(nullptr), _indices(nullptr) +CLMaxUnpoolingLayerKernel::CLMaxUnpoolingLayerKernel() : _input(nullptr), _output(nullptr), _indices(nullptr) { _type = CLKernelType::POOL; } -void CLMaxUnpoolingLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info) +void CLMaxUnpoolingLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *indices, + ICLTensor *output, + const PoolingLayerInfo &pool_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, indices->info())); - auto padding_info = get_padding_info({ input, indices, output }); + auto padding_info = get_padding_info({input, indices, output}); _input = input; _output = output; @@ -119,7 +128,10 @@ void CLMaxUnpoolingLayerKernel::configure(const CLCompileContext &compile_contex ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLMaxUnpoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info) +Status CLMaxUnpoolingLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *indices, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices)); @@ -140,7 +152,6 @@ void CLMaxUnpoolingLayerKernel::run(const Window &window, cl::CommandQueue &queu add_3D_tensor_argument(idx, _output, slice); add_3D_tensor_argument(idx, _indices, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); + } while (window.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h index 45481d0507..eb18a46784 100644 --- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h @@ -59,7 +59,11 @@ public: * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *indices, + ICLTensor *output, + const PoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -72,7 +76,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *indices, + const ITensorInfo *output, + const PoolingLayerInfo &pool_info); // Inherited methods overridden void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp index ac33468ad8..8632bdf623 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -49,7 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); // Checks performed when output is configured - if((output != nullptr) && (output->total_size() != 0)) + if ((output != nullptr) && (output->total_size() != 0)) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -69,15 +70,19 @@ void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *out configure(CLKernelLibrary::get().get_compile_context(), input, output, epsilon); } -void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon) +void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output, + float epsilon) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); _run_in_place = (output == nullptr) || (output == input); - ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate(input->info(), (output != nullptr) ? output->info() : nullptr, epsilon)); + ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate( + input->info(), (output != nullptr) ? output->info() : nullptr, epsilon)); - if(output != nullptr) + if (output != nullptr) { auto_init_if_empty(*output->info(), *input->info()); } @@ -85,7 +90,8 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_ _input = input; _output = output; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); + const unsigned int num_elems_processed_per_iteration = + adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)); // Set build options CLBuildOptions build_opts; @@ -134,7 +140,6 @@ void CLMeanStdDevNormalizationKernel::run(const Window &window, cl::CommandQueue add_2D_tensor_argument_if((!_run_in_place), idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); + } while (window.slide_window_slice_2D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h index a1ba2b905e..e02a3c58a3 100644 --- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h +++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h @@ -66,7 +66,10 @@ public: * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8. */ - void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f); + void configure(const CLCompileContext &compile_context, + ICLTensor *input, + ICLTensor *output = nullptr, + float epsilon = 1e-8f); /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel * * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr, diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index c6c4229c00..b636c485e7 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -32,6 +32,7 @@ #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/Window.h" + #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" @@ -53,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, N ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); @@ -63,7 +64,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, N return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info) +std::pair +validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info) { // Output tensor auto initialization if not yet initialized auto_init_if_empty(*output, *input->clone()); @@ -71,9 +73,10 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen bool window_changed = false; Window win; const DataLayout data_layout = input->data_layout(); - if(data_layout == DataLayout::NCHW) + if (data_layout == DataLayout::NCHW) { - const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); + const unsigned int vec_size_x = + adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info); const bool is_norm_across_width = norm_idx == 0; @@ -87,15 +90,16 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen // The output has 1 right padding because of the vec_size_x. // The input has 1 left padding because radius = 1. // The input has 2 right padding because of radius = 1 AND because of the extra output padding - const unsigned int border_width_left = is_norm_across_width ? norm_radius : 0; - const unsigned int border_width_right = is_norm_across_width ? norm_radius + (vec_size_x - input->dimension(0) % vec_size_x) : 0; - const BorderSize border_size = BorderSize(0, border_width_right, 0, border_width_left); + const unsigned int border_width_left = is_norm_across_width ? norm_radius : 0; + const unsigned int border_width_right = + is_norm_across_width ? norm_radius + (vec_size_x - input->dimension(0) % vec_size_x) : 0; + const BorderSize border_size = BorderSize(0, border_width_right, 0, border_width_left); win = calculate_max_window(*input, Steps(vec_size_x)); // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding // Reads can occur within the valid region of the input - if(is_norm_across_width) + if (is_norm_across_width) { AccessWindowStatic input_access(input, -border_size.left, 0, input->dimension(0) + border_size.right, 0); window_changed = window_changed || update_window_and_padding(win, input_access); @@ -112,13 +116,14 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen else { unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); - if(norm_info.is_cross_map()) + if (norm_info.is_cross_map()) { vec_size_x = 1; } win = calculate_max_window(*input, Steps(vec_size_x)); } - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + Status err = + (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace @@ -139,10 +144,13 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info); } -void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info) +void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + NormalizationLayerInfo norm_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info)); @@ -152,16 +160,17 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte _input = input; _output = output; - const DataLayout data_layout = input->info()->data_layout(); - unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0)); - int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x; - if(norm_info.is_cross_map() && data_layout == DataLayout::NHWC) + const DataLayout data_layout = input->info()->data_layout(); + unsigned int vec_size_x = + adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0)); + int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x; + if (norm_info.is_cross_map() && data_layout == DataLayout::NHWC) { vec_size_x = 1; vec_size_x_leftovers = 0; } - if(data_layout == DataLayout::NCHW) + if (data_layout == DataLayout::NCHW) { const unsigned int norm_idx = get_normalization_dimension_index(data_layout, norm_info); _is_norm_across_width = norm_idx == 0; @@ -175,9 +184,10 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte // The output has 1 right padding because of the vec_size_x. // The input has 1 left padding because radius = 1. // The input has 2 right padding because of radius = 1 AND the extra output padding - const unsigned int border_width_left = _is_norm_across_width ? norm_radius : 0; - const unsigned int border_width_right = _is_norm_across_width ? norm_radius + (vec_size_x - input->info()->dimension(0) % vec_size_x) : 0; - _border_size = BorderSize(0, border_width_right, 0, border_width_left); + const unsigned int border_width_left = _is_norm_across_width ? norm_radius : 0; + const unsigned int border_width_right = + _is_norm_across_width ? norm_radius + (vec_size_x - input->info()->dimension(0) % vec_size_x) : 0; + _border_size = BorderSize(0, border_width_right, 0, border_width_left); } const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D); @@ -193,12 +203,14 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2))); build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D"); - build_opts.add_option_if(norm_info.is_in_map() || (data_layout == DataLayout::NHWC && norm_info.is_cross_map()), "-DWIDTH_SIZE=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.add_option_if(norm_info.is_in_map() && data_layout == DataLayout::NHWC, "-DDIM1_SIZE=" + support::cpp11::to_string(input->info()->dimension(1))); + build_opts.add_option_if(norm_info.is_in_map() || (data_layout == DataLayout::NHWC && norm_info.is_cross_map()), + "-DWIDTH_SIZE=" + support::cpp11::to_string(input->info()->dimension(0))); + build_opts.add_option_if(norm_info.is_in_map() && data_layout == DataLayout::NHWC, + "-DDIM1_SIZE=" + support::cpp11::to_string(input->info()->dimension(1))); // Create kernel std::string kernel_name; - if(norm_info.is_in_map()) + if (norm_info.is_in_map()) { kernel_name = "normalization_layer_in_map_" + lower_string(string_from_data_layout(data_layout)); } @@ -222,16 +234,19 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte _config_id += support::cpp11::to_string(input->info()->dimension(0)); _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(1)); - if(data_layout == DataLayout::NHWC) + if (data_layout == DataLayout::NHWC) { ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } } -Status CLNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info) +Status CLNormalizationLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + NormalizationLayerInfo norm_info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first); return Status{}; } @@ -251,7 +266,6 @@ void CLNormalizationLayerKernel::run(const Window &window, cl::CommandQueue &que add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window_collapsed.slide_window_slice_3D(slice)); + } while (window_collapsed.slide_window_slice_3D(slice)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.h b/src/core/CL/kernels/CLNormalizationLayerKernel.h index 739a2ae9f1..5517ba6904 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.h +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.h @@ -63,7 +63,10 @@ public: * Data layouts supported: same as @p input. * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + NormalizationLayerInfo norm_info); /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], @@ -77,7 +80,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info); // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; + void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; private: diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 6b0400d50e..59352a8fb7 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -31,32 +31,35 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) +Status +validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std); ARM_COMPUTE_RETURN_ERROR_ON_MSG(mean->num_dimensions() > 1, "mean and std must be vectors"); - const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); + const unsigned int channel_idx = + get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != mean->dimension(0)); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); @@ -77,7 +80,8 @@ std::pair validate_and_configure_window_nchw(ITensorInfo *input, bool window_changed = update_window_and_padding(win, input_access, output_access); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + Status err = + (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace @@ -88,12 +92,19 @@ CLNormalizePlanarYUVLayerKernel::CLNormalizePlanarYUVLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std) +void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *std) { configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, std); } -void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std) +void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *std) { // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std); @@ -102,7 +113,7 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_ // Output tensor auto initialization if not yet initialized auto_init_if_empty(*output->info(), *input->info()->clone()); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; @@ -112,9 +123,10 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_ const DataLayout data_layout = input->info()->data_layout(); // Get number of elements to process per iterations - const unsigned int num_elems_processed_per_iteration = (data_layout == DataLayout::NHWC) ? adjust_vec_size(16 / input->info()->element_size(), - input->info()->dimension(0)) : - (16 / input->info()->element_size()); + const unsigned int num_elems_processed_per_iteration = + (data_layout == DataLayout::NHWC) + ? adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0)) + : (16 / input->info()->element_size()); const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const DataType dt = input->info()->data_type(); @@ -122,11 +134,12 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_ CLBuildOptions build_opts; build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.add_option(("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration))); + build_opts.add_option(("-DVEC_SIZE_LEFTOVER=" + + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration))); build_opts.add_option(("-DNUM_CHANNELS=" + support::cpp11::to_string(input->info()->dimension(channel_idx)))); std::string kernel_name = "normalize_planar_yuv_layer_"; - if(is_data_type_quantized(dt)) + if (is_data_type_quantized(dt)) { const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset))); @@ -139,7 +152,7 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_ _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window - if(data_layout == DataLayout::NHWC) + if (data_layout == DataLayout::NHWC) { Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); ICLKernel::configure_internal(win); @@ -165,12 +178,16 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_ _config_id += support::cpp11::to_string(input->info()->dimension(2)); } -Status CLNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) +Status CLNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *mean, + const ITensorInfo *std) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, std)); - if(input->data_layout() == DataLayout::NCHW) + if (input->data_layout() == DataLayout::NCHW) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_nchw(input->clone().get(), output->clone().get()).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window_nchw(input->clone().get(), output->clone().get()).first); } return Status{}; } @@ -196,7 +213,6 @@ void CLNormalizePlanarYUVLayerKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h index 6db4433e78..341b404e3d 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h @@ -67,7 +67,11 @@ public: * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels. * Data types supported: same as @p input */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *mean, + const ICLTensor *std); /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel * * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels]. @@ -79,7 +83,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp index 53f313c0d3..0ac285038e 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.cpp +++ b/src/core/CL/kernels/CLPadLayerKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -35,25 +36,29 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const PaddingList &padding, + PixelValue constant_value, + PaddingMode mode) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_UNUSED(constant_value); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON((padding.size() < 1) || (padding.size() > input->num_dimensions())); - if(mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC) + if (mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC) { ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 3); const auto is_reflect = static_cast(mode == PaddingMode::REFLECT); - for(size_t i = 0; i < padding.size(); ++i) + for (size_t i = 0; i < padding.size(); ++i) { ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).first > (input->dimension(i) - is_reflect)); ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).second > (input->dimension(i) - is_reflect)); } } - if(output->total_size() > 0) + if (output->total_size() > 0) { TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding); @@ -65,41 +70,51 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } } // namespace -CLPadLayerKernel::CLPadLayerKernel() - : _input(nullptr), _output(nullptr), _4d_enabled(false) +CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _4d_enabled(false) { _type = CLKernelType::ELEMENTWISE; } -void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) +void CLPadLayerKernel::configure( + const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) { configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode); } -void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) +void CLPadLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const PaddingList &padding, + PixelValue constant_value, + PaddingMode mode) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding))); + auto_init_if_empty(*output->info(), + input->info()->clone()->set_tensor_shape( + misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding))); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, constant_value, mode)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; _4d_enabled = (mode == PaddingMode::CONSTANT) && (padding.size() > 3); // Set build options - const DataType &data_type = input->info()->data_type(); - const unsigned int input_width = input->info()->dimension(0); - const unsigned int input_height = input->info()->dimension(1); - const unsigned int input_depth = input->info()->dimension(2); - const unsigned int pad_x_before = padding.at(0).first; - const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0; - const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0; - const unsigned int vec_size = adjust_vec_size(std::min(16U, 32U / static_cast(element_size_from_data_type(input->info()->data_type()))), input_width); - const unsigned int pad_right_start = input_width + pad_x_before; - const unsigned int pad_x_before_remainder = pad_x_before % vec_size; - const unsigned int vec_size_leftover_write = vec_size - (ceil_to_multiple(output->info()->dimension(0), vec_size) - output->info()->dimension(0)); + const DataType &data_type = input->info()->data_type(); + const unsigned int input_width = input->info()->dimension(0); + const unsigned int input_height = input->info()->dimension(1); + const unsigned int input_depth = input->info()->dimension(2); + const unsigned int pad_x_before = padding.at(0).first; + const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0; + const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0; + const unsigned int vec_size = adjust_vec_size( + std::min(16U, 32U / static_cast(element_size_from_data_type(input->info()->data_type()))), + input_width); + const unsigned int pad_right_start = input_width + pad_x_before; + const unsigned int pad_x_before_remainder = pad_x_before % vec_size; + const unsigned int vec_size_leftover_write = + vec_size - (ceil_to_multiple(output->info()->dimension(0), vec_size) - output->info()->dimension(0)); CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); @@ -108,12 +123,12 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width)); build_opts.add_option("-DPAD_X_BEFORE_REMAINDER=" + support::cpp11::to_string(pad_x_before_remainder)); build_opts.add_option("-DVEC_SIZE_LEFTOVER_WRITE=" + support::cpp11::to_string(vec_size_leftover_write)); - if(padding.size() > 1) + if (padding.size() > 1) { build_opts.add_option("-DPAD_Y_BEFORE=" + support::cpp11::to_string(pad_y_before)); build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input_height)); - if(padding.size() > 2) + if (padding.size() > 2) { build_opts.add_option("-DPAD_Z_BEFORE=" + support::cpp11::to_string(pad_z_before)); build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input_depth)); @@ -121,23 +136,25 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const } std::string kernel_name = "pad_layer_"; - switch(mode) + switch (mode) { case PaddingMode::CONSTANT: { kernel_name += "constant"; - const unsigned int vec_size_leftover_read = vec_size - (ceil_to_multiple(pad_right_start, vec_size) - pad_right_start); + const unsigned int vec_size_leftover_read = + vec_size - (ceil_to_multiple(pad_right_start, vec_size) - pad_right_start); build_opts.add_option("-DCONST_VAL=" + string_from_pixel_value(constant_value, data_type)); build_opts.add_option("-DVEC_SIZE_LEFTOVER_READ=" + support::cpp11::to_string(vec_size_leftover_read)); - if(pad_x_before >= vec_size) + if (pad_x_before >= vec_size) { build_opts.add_option("-DTHREADS_TO_SKIP_BEFORE=" + support::cpp11::to_string(pad_x_before / vec_size)); - build_opts.add_option("-DTHREADS_TO_SKIP_AFTER=" + support::cpp11::to_string(pad_right_start / vec_size)); + build_opts.add_option("-DTHREADS_TO_SKIP_AFTER=" + + support::cpp11::to_string(pad_right_start / vec_size)); } - if(_4d_enabled) + if (_4d_enabled) { build_opts.add_option("-DPAD_W_BEFORE=" + support::cpp11::to_string(padding.at(3).first)); build_opts.add_option("-DSRC_BATCH=" + support::cpp11::to_string(input->info()->dimension(3))); @@ -154,14 +171,17 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const const unsigned int pad_x_after_remainder = pad_right_start % vec_size; const unsigned int after_pad_fact_x = (2 * input_width + pad_x_before) - is_reflect; - const unsigned int output_last_x = ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size); + const unsigned int output_last_x = ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size); build_opts.add_option("-DIS_REFLECT=" + support::cpp11::to_string(is_reflect)); build_opts.add_option("-DPAD_X_AFTER_REMAINDER=" + support::cpp11::to_string(pad_x_after_remainder)); - build_opts.add_option("-DPAD_X_BEFORE_REMAINDER_REFL=" + support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size)); - build_opts.add_option("-DPAD_X_AFTER_REMAINDER_REFL=" + support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size)); + build_opts.add_option("-DPAD_X_BEFORE_REMAINDER_REFL=" + + support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size)); + build_opts.add_option("-DPAD_X_AFTER_REMAINDER_REFL=" + + support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size)); build_opts.add_option("-DAFTER_PAD_FACT_X=" + support::cpp11::to_string(after_pad_fact_x)); - build_opts.add_option_if(after_pad_fact_x < output_last_x, "-DAFTER_PAD_REM=" + support::cpp11::to_string(after_pad_fact_x % vec_size)); + build_opts.add_option_if(after_pad_fact_x < output_last_x, + "-DAFTER_PAD_REM=" + support::cpp11::to_string(after_pad_fact_x % vec_size)); break; } @@ -179,7 +199,11 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLPadLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode) +Status CLPadLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const PaddingList &padding, + PixelValue constant_value, + PaddingMode mode) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, constant_value, mode)); return Status{}; @@ -197,13 +221,12 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - if(_4d_enabled) + if (_4d_enabled) { add_argument(idx, batch++); } enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); + } while (window.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLPadLayerKernel.h b/src/core/CL/kernels/CLPadLayerKernel.h index 90af337f94..dca121b6a1 100644 --- a/src/core/CL/kernels/CLPadLayerKernel.h +++ b/src/core/CL/kernels/CLPadLayerKernel.h @@ -56,7 +56,11 @@ public: * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ - void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + void configure(const ICLTensor *input, + ICLTensor *output, + const PaddingList &padding, + PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); /** Set the input and output tensor. * * @param[in] compile_context The compile context to be used. @@ -68,8 +72,12 @@ public: * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), - PaddingMode mode = PaddingMode::CONSTANT); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const PaddingList &padding, + PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel * * @param[in] input Source tensor info. Data types supported: All. @@ -80,7 +88,11 @@ public: * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT, * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT). */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const PaddingList &padding, + PixelValue constant_value = PixelValue(), + PaddingMode mode = PaddingMode::CONSTANT); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp index bf1b874dd0..7dcdf1de6f 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp @@ -30,10 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; @@ -42,7 +42,10 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info) +Status validate_arguments(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32); @@ -51,10 +54,10 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, // Check variances const int var_size = info.variances().size(); - if(var_size > 1) + if (var_size > 1) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size != 4, "Must provide 4 variance values"); - for(int i = 0; i < var_size; ++i) + for (int i = 0; i < var_size; ++i) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size <= 0, "Must be greater than 0"); } @@ -62,17 +65,19 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[0] < 0.f, "Step x should be greater or equal to 0"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[1] < 0.f, "Step y should be greater or equal to 0"); - if(!info.max_sizes().empty()) + if (!info.max_sizes().empty()) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(), "Max and min sizes dimensions should match"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(), + "Max and min sizes dimensions should match"); } - for(unsigned int i = 0; i < info.max_sizes().size(); ++i) + for (unsigned int i = 0; i < info.max_sizes().size(); ++i) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i], "Max size should be greater than min size"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i], + "Max size should be greater than min size"); } - if(output != nullptr && output->total_size() != 0) + if (output != nullptr && output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2); } @@ -80,7 +85,11 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, return Status{}; } -std::pair validate_and_configure_window(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const PriorBoxLayerInfo &info, int num_priors) +std::pair validate_and_configure_window(const ITensorInfo *input1, + const ITensorInfo *input2, + ITensorInfo *output, + const PriorBoxLayerInfo &info, + int num_priors) { ARM_COMPUTE_UNUSED(input2); // Output tensor auto initialization if not yet initialized @@ -88,10 +97,11 @@ std::pair validate_and_configure_window(const ITensorInfo *input auto_init_if_empty(*output, output_shape, 1, input1->data_type()); const unsigned int num_elems_processed_per_iteration = 4 * num_priors; - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); + Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); bool window_changed = update_window_and_padding(win, output_access); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + Status err = + (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; return std::make_pair(err, win); } } // namespace @@ -102,13 +112,25 @@ CLPriorBoxLayerKernel::CLPriorBoxLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios) +void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + const PriorBoxLayerInfo &info, + cl::Buffer *min, + cl::Buffer *max, + cl::Buffer *aspect_ratios) { configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, info, min, max, aspect_ratios); } -void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, - cl::Buffer *max, cl::Buffer *aspect_ratios) +void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + const PriorBoxLayerInfo &info, + cl::Buffer *min, + cl::Buffer *max, + cl::Buffer *aspect_ratios) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); @@ -135,7 +157,7 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c int img_width = info.img_size().x; int img_height = info.img_size().y; - if(img_width == 0 || img_height == 0) + if (img_width == 0 || img_height == 0) { img_width = input2->info()->dimension(width_idx); img_height = input2->info()->dimension(height_idx); @@ -143,7 +165,7 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c float step_x = info.steps()[0]; float step_y = info.steps()[0]; - if(step_x == 0.f || step_y == 0.f) + if (step_x == 0.f || step_y == 0.f) { step_x = static_cast(img_width) / layer_width; step_y = static_cast(img_height) / layer_height; @@ -162,18 +184,20 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(info.offset())); build_opts.add_option_if(info.clip(), "-DIN_PLACE"); - if(info.variances().size() > 1) + if (info.variances().size() > 1) { - for(unsigned int i = 0; i < info.variances().size(); ++i) + for (unsigned int i = 0; i < info.variances().size(); ++i) { - build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(info.variances().at(i))); + build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" + + support::cpp11::to_string(info.variances().at(i))); } } else { - for(unsigned int i = 0; i < 4; ++i) + for (unsigned int i = 0; i < 4; ++i) { - build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(info.variances().at(0))); + build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" + + support::cpp11::to_string(info.variances().at(0))); } } @@ -194,13 +218,17 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c ICLKernel::configure_internal(win_config.second); } -Status CLPriorBoxLayerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info) +Status CLPriorBoxLayerKernel::validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, info)); const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size(); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get(), info, num_priors) - .first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), + output->clone().get(), info, num_priors) + .first); return Status{}; } @@ -211,8 +239,9 @@ void CLPriorBoxLayerKernel::run(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); queue.enqueueWriteBuffer(*_min, CL_TRUE, 0, _info.min_sizes().size() * sizeof(float), _info.min_sizes().data()); - queue.enqueueWriteBuffer(*_aspect_ratios, CL_TRUE, 0, _info.aspect_ratios().size() * sizeof(float), _info.aspect_ratios().data()); - if(!_info.max_sizes().empty()) + queue.enqueueWriteBuffer(*_aspect_ratios, CL_TRUE, 0, _info.aspect_ratios().size() * sizeof(float), + _info.aspect_ratios().data()); + if (!_info.max_sizes().empty()) { queue.enqueueWriteBuffer(*_max, CL_TRUE, 0, _info.max_sizes().size() * sizeof(float), _info.max_sizes().data()); } diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.h b/src/core/CL/kernels/CLPriorBoxLayerKernel.h index 6c369a7a4e..a50e0c5ff5 100644 --- a/src/core/CL/kernels/CLPriorBoxLayerKernel.h +++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.h @@ -57,7 +57,13 @@ public: * @param[in] max Maximum prior box values * @param[in] aspect_ratios Aspect ratio values */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios); + void configure(const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + const PriorBoxLayerInfo &info, + cl::Buffer *min, + cl::Buffer *max, + cl::Buffer *aspect_ratios); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -69,8 +75,14 @@ public: * @param[in] max Maximum prior box values * @param[in] aspect_ratios Aspect ratio values */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, - cl::Buffer *aspect_ratios); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input1, + const ICLTensor *input2, + ICLTensor *output, + const PriorBoxLayerInfo &info, + cl::Buffer *min, + cl::Buffer *max, + cl::Buffer *aspect_ratios); /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel * * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC. @@ -80,14 +92,17 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info); + static Status validate(const ITensorInfo *input1, + const ITensorInfo *input2, + const ITensorInfo *output, + const PriorBoxLayerInfo &info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; private: - const ICLTensor *_input1; - const ICLTensor *_input2; + const ICLTensor *_input1; + const ICLTensor *_input2; ICLTensor *_output; PriorBoxLayerInfo _info; int _num_priors; diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp index bd573e54c8..731fcb8e04 100644 --- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp @@ -22,10 +22,12 @@ * SOFTWARE. */ #include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h" + #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -49,14 +51,19 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen const uint32_t temp_num_elems_processed_per_iteration = max_cl_vector_width / input->element_size(); /* If width is less then step, then make step same as width to avoid global size being step instead of actual width. */ /* Or we should fix in arm_compute::enqueue() or arm_compute::calculate_max_window(). */ - const uint32_t num_elems_processed_per_iteration = (input->dimension(0) < temp_num_elems_processed_per_iteration) ? input->dimension(0) : temp_num_elems_processed_per_iteration; + const uint32_t num_elems_processed_per_iteration = (input->dimension(0) < temp_num_elems_processed_per_iteration) + ? input->dimension(0) + : temp_num_elems_processed_per_iteration; // This kernel doesn't need padding Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); return std::make_pair(Status{}, win); } -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *weight, + const ITensorInfo *bias) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weight, bias, output); @@ -72,7 +79,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(weight, bias); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -87,10 +94,14 @@ CLQLSTMLayerNormalizationKernel::CLQLSTMLayerNormalizationKernel() _type = CLKernelType::ELEMENTWISE; } -void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias) +void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *weight, + const ICLTensor *bias) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, bias, output); - auto padding_info = get_padding_info({ input, weight, bias, output }); + auto padding_info = get_padding_info({input, weight, bias, output}); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), weight->info(), bias->info())); @@ -104,7 +115,8 @@ void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_ int32_t output_multiplier{}; int32_t output_shift{}; const UniformQuantizationInfo quan_info = _weight->info()->quantization_info().uniform(); - const Status status = quantization::calculate_quantized_multiplier(quan_info.scale, &output_multiplier, &output_shift); + const Status status = + quantization::calculate_quantized_multiplier(quan_info.scale, &output_multiplier, &output_shift); output_shift *= -1; // Set build options @@ -114,8 +126,12 @@ void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_ build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); - build_opts.add_option("-DMIN_BOUND=" + support::cpp11::to_string(std::get<0>(quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type())))); - build_opts.add_option("-DMAX_BOUND=" + support::cpp11::to_string(std::get<1>(quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type())))); + build_opts.add_option("-DMIN_BOUND=" + + support::cpp11::to_string(std::get<0>( + quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type())))); + build_opts.add_option("-DMAX_BOUND=" + + support::cpp11::to_string(std::get<1>( + quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type())))); // Create kernel _kernel = create_kernel(compile_context, "qlstm_layer_normalization", build_opts.options()); @@ -135,12 +151,18 @@ void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -void CLQLSTMLayerNormalizationKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias) +void CLQLSTMLayerNormalizationKernel::configure(const ICLTensor *input, + ICLTensor *output, + const ICLTensor *weight, + const ICLTensor *bias) { configure(CLKernelLibrary::get().get_compile_context(), input, output, weight, bias); } -Status CLQLSTMLayerNormalizationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias) +Status CLQLSTMLayerNormalizationKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const ITensorInfo *weight, + const ITensorInfo *bias) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, weight, bias)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); @@ -171,7 +193,6 @@ void CLQLSTMLayerNormalizationKernel::run(const Window &window, cl::CommandQueue add_2D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); + } while (window.slide_window_slice_2D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h index 31085c37ba..ba912e1d2d 100644 --- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h +++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h @@ -63,7 +63,11 @@ public: * @param[in] weight Weight tensor. Data types supported: Same as @p input. * @param[in] bias Bias tensor. Data types supported: S32. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *weight, + const ICLTensor *bias); /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel * * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16. @@ -73,7 +77,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp index 69a6fa5fa0..c97910ef79 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -42,24 +43,29 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *rois, + ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output); ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(0) != 5); ARM_COMPUTE_RETURN_ERROR_ON(rois->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::F32, DataType::F16); ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC, DataLayout::NCHW); ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0)); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(compute_roi_align_shape(*input, *rois, pool_info), output->tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(compute_roi_align_shape(*input, *rois, pool_info), + output->tensor_shape()); } - if(is_data_type_quantized_asymmetric(input->data_type())) + if (is_data_type_quantized_asymmetric(input->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rois, 1, DataType::QASYMM16); @@ -82,12 +88,19 @@ CLROIAlignLayerKernel::CLROIAlignLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLROIAlignLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) +void CLROIAlignLayerKernel::configure(const ICLTensor *input, + const ICLTensor *rois, + ICLTensor *output, + const ROIPoolingLayerInfo &pool_info) { configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info); } -void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) +void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *rois, + ICLTensor *output, + const ROIPoolingLayerInfo &pool_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, rois); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), rois->info(), output->info(), pool_info)); @@ -97,7 +110,7 @@ void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, c auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); output->info()->set_data_layout(input->info()->data_layout()); - auto padding_info = get_padding_info({ input, rois, output }); + auto padding_info = get_padding_info({input, rois, output}); _input = input; _output = output; @@ -111,16 +124,23 @@ void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, c CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); build_opts.add_option("-DDATA_SIZE=" + get_data_size_from_data_type(input->info()->data_type())); - build_opts.add_option("-DMAX_DIM_X=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH)))); - build_opts.add_option("-DMAX_DIM_Y=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT)))); - build_opts.add_option("-DMAX_DIM_Z=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL)))); + build_opts.add_option("-DMAX_DIM_X=" + + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index( + input->info()->data_layout(), DataLayoutDimension::WIDTH)))); + build_opts.add_option("-DMAX_DIM_Y=" + + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index( + input->info()->data_layout(), DataLayoutDimension::HEIGHT)))); + build_opts.add_option("-DMAX_DIM_Z=" + + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index( + input->info()->data_layout(), DataLayoutDimension::CHANNEL)))); build_opts.add_option("-DPOOLED_DIM_X=" + support::cpp11::to_string(pool_info.pooled_width())); build_opts.add_option("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height())); build_opts.add_option("-DSPATIAL_SCALE=" + float_to_string_with_full_precision(pool_info.spatial_scale())); build_opts.add_option_if(input->info()->data_layout() == DataLayout::NHWC, "-DNHWC"); - build_opts.add_option_if(pool_info.sampling_ratio() > 0, "-DSAMPLING_RATIO=" + support::cpp11::to_string(pool_info.sampling_ratio())); + build_opts.add_option_if(pool_info.sampling_ratio() > 0, + "-DSAMPLING_RATIO=" + support::cpp11::to_string(pool_info.sampling_ratio())); - if(is_qasymm) + if (is_qasymm) { const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); const UniformQuantizationInfo roisq_info = rois->info()->quantization_info().uniform(); @@ -144,7 +164,10 @@ void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, c ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLROIAlignLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info) +Status CLROIAlignLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *rois, + ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, rois, output, pool_info)); return Status{}; diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.h b/src/core/CL/kernels/CLROIAlignLayerKernel.h index 5284a5913f..2e84e5d303 100644 --- a/src/core/CL/kernels/CLROIAlignLayerKernel.h +++ b/src/core/CL/kernels/CLROIAlignLayerKernel.h @@ -61,7 +61,8 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + void + configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -77,7 +78,11 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *rois, + ICLTensor *output, + const ROIPoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel * * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -93,7 +98,10 @@ public: * * @return a Status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *rois, + ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue); diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index f6933c6cfd..1b2c414a49 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -48,7 +49,10 @@ CLROIPoolingLayerKernel::CLROIPoolingLayerKernel() _type = CLKernelType::ELEMENTWISE; } -Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info) +Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *rois, + const ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output); @@ -61,10 +65,11 @@ Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensor ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8); ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0)); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) || (output->dimension(1) != pool_info.pooled_height())); + ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) || + (output->dimension(1) != pool_info.pooled_height())); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != output->dimension(2)); ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(1) != output->dimension(3)); } @@ -72,20 +77,30 @@ Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensor return Status{}; } -void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info) +void CLROIPoolingLayerKernel::configure(const ICLTensor *input, + const ICLTensor *rois, + ICLTensor *output, + const ROIPoolingLayerInfo &pool_info) { configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info); } -void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info) +void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *rois, + const ICLTensor *output, + const ROIPoolingLayerInfo &pool_info) { - ARM_COMPUTE_ERROR_THROW_ON(CLROIPoolingLayerKernel::validate(input->info(), rois->info(), output->info(), pool_info)); + ARM_COMPUTE_ERROR_THROW_ON( + CLROIPoolingLayerKernel::validate(input->info(), rois->info(), output->info(), pool_info)); - auto padding_info = get_padding_info({ input, rois, output }); + auto padding_info = get_padding_info({input, rois, output}); // Output auto initialization if not yet initialized - TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->info()->dimension(2), rois->info()->dimension(1)); - auto_init_if_empty(*(output->info()), output_shape, 1, input->info()->data_type(), output->info()->quantization_info()); + TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->info()->dimension(2), + rois->info()->dimension(1)); + auto_init_if_empty(*(output->info()), output_shape, 1, input->info()->data_type(), + output->info()->quantization_info()); // Set instance variables _input = input; @@ -107,11 +122,12 @@ void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context, build_opts.add_option("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height())); build_opts.add_option("-DSPATIAL_SCALE=" + support::cpp11::to_string(pool_info.spatial_scale())); - if(is_qasymm) + if (is_qasymm) { // Determine quantization info scale, offset UniformQuantizationInfo uqinfo = UniformQuantizationInfo(); - uqinfo = compute_requantization_scale_offset(_input->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform()); + uqinfo = compute_requantization_scale_offset(_input->info()->quantization_info().uniform(), + _output->info()->quantization_info().uniform()); build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(uqinfo.offset)); build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(uqinfo.scale)); diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.h b/src/core/CL/kernels/CLROIPoolingLayerKernel.h index 7b7b457632..80bfb63092 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.h +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.h @@ -59,7 +59,8 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + void + configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info); /** Set the input and output tensors. * * @param[in] compile_context The compile context to be used. @@ -74,7 +75,11 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *rois, + const ICLTensor *output, + const ROIPoolingLayerInfo &pool_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -92,7 +97,10 @@ public: * @note The z dimensions of @p output tensor and @p input tensor must be the same. * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array. */ - static Status validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info); + static Status validate(const ITensorInfo *input, + const ITensorInfo *rois, + const ITensorInfo *output, + const ROIPoolingLayerInfo &pool_info); private: const ICLTensor *_input; diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index a06c2eed75..622f6210b9 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -42,11 +43,8 @@ constexpr unsigned int vector_size_byte_opencl = 16; Status validate_arguments(const ITensorInfo *output, const float start, const float end, const float step) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, - 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8, DataType::QASYMM8, + DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output); @@ -56,19 +54,22 @@ Status validate_arguments(const ITensorInfo *output, const float start, const fl ARM_COMPUTE_RETURN_ERROR_ON_MSG((start == end), "start of the requested sequence must not be equal to the end"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(start, output->data_type(), output->quantization_info()), "start value is outside the range of the data type"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(end, output->data_type(), output->quantization_info()), "end value is outside the range of the data type"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(step, output->data_type(), output->quantization_info()), "step value is outside the range of the data type"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(start, output->data_type(), output->quantization_info()), + "start value is outside the range of the data type"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(end, output->data_type(), output->quantization_info()), + "end value is outside the range of the data type"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(step, output->data_type(), output->quantization_info()), + "step value is outside the range of the data type"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->num_dimensions() != 1, "Output has to be a 1-D tensor"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() < num_of_elements_in_range(start, end, step), "Output tensor size is incorrect"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() < num_of_elements_in_range(start, end, step), + "Output tensor size is incorrect"); return Status{}; } } // namespace -CLRangeKernel::CLRangeKernel() - : _start(0), _end(1), _step(1), _output(nullptr) +CLRangeKernel::CLRangeKernel() : _start(0), _end(1), _step(1), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -78,16 +79,18 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float configure(CLKernelLibrary::get().get_compile_context(), output, start, end, step); } -void CLRangeKernel::configure(const CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step) +void CLRangeKernel::configure( + const CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step) { ARM_COMPUTE_ERROR_ON_NULLPTR(output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(output->info(), start, end, step)); // Configure kernel window - unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / output->info()->element_size(), output->info()->dimension(0)); - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + unsigned int num_elems_processed_per_iteration = + adjust_vec_size(vector_size_byte_opencl / output->info()->element_size(), output->info()->dimension(0)); + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - auto padding_info = get_padding_info({ output }); + auto padding_info = get_padding_info({output}); _start = start; _end = end; @@ -100,10 +103,11 @@ void CLRangeKernel::configure(const CLCompileContext &compile_context, ICLTensor CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(output->info()->dimension(0) % num_elems_processed_per_iteration)); + build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + + support::cpp11::to_string(output->info()->dimension(0) % num_elems_processed_per_iteration)); build_opts.add_option("-DSTART=" + support::cpp11::to_string(start)); build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step)); - if(is_data_type_quantized_asymmetric(output->info()->data_type())) + if (is_data_type_quantized_asymmetric(output->info()->data_type())) { const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset)); diff --git a/src/core/CL/kernels/CLRangeKernel.h b/src/core/CL/kernels/CLRangeKernel.h index 1b94a099ed..65251a11e5 100644 --- a/src/core/CL/kernels/CLRangeKernel.h +++ b/src/core/CL/kernels/CLRangeKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLRANGEKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index e5cfb997ca..70875a2d40 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -28,15 +28,15 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" +#include "arm_compute/core/Validate.h" + #include "src/core/AccessWindowStatic.h" #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" namespace arm_compute @@ -47,23 +47,28 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - if(input->num_channels() == 1) + if (input->num_channels() == 1) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S32, DataType::F16, DataType::F32); } else { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(axis == 0); } - ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, "Not supported reduction operation for QASYMM8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, + "Not supported reduction operation for QASYMM8"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, + "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - ARM_COMPUTE_RETURN_ERROR_ON((op == ReductionOperation::MEAN_SUM) && (axis == 0) && (input->dimension(0) == 0) && (input->data_type() != DataType::QASYMM8) - && (input->data_type() != DataType::QASYMM8_SIGNED)); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN), "Not supported reduction operation, use CLArgMinMaxLayer"); + ARM_COMPUTE_RETURN_ERROR_ON((op == ReductionOperation::MEAN_SUM) && (axis == 0) && (input->dimension(0) == 0) && + (input->data_type() != DataType::QASYMM8) && + (input->data_type() != DataType::QASYMM8_SIGNED)); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN), + "Not supported reduction operation, use CLArgMinMaxLayer"); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); @@ -79,33 +84,42 @@ CLReductionOperationKernel::CLReductionOperationKernel() _type = CLKernelType::ELEMENTWISE; } -void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +void CLReductionOperationKernel::configure(const ICLTensor *input, + ICLTensor *output, + unsigned int axis, + ReductionOperation op) { configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op); } -void CLReductionOperationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +void CLReductionOperationKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + unsigned int axis, + ReductionOperation op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; _reduction_axis = axis; _op = op; - const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, true); - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).reset_padding().set_is_resizable(true)); + const TensorShape output_shape = + arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, true); + auto_init_if_empty(*output->info(), + input->info()->clone()->set_tensor_shape(output_shape).reset_padding().set_is_resizable(true)); // Set build options CLBuildOptions build_opts; DataType data_type = input->info()->data_type(); std::string data_type_promoted{}; - if(is_data_type_quantized(data_type)) + if (is_data_type_quantized(data_type)) { data_type_promoted = "int"; } @@ -130,10 +144,14 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte build_opts.add_option_if(op == ReductionOperation::PROD, "-DPROD"); build_opts.add_option_if(op == ReductionOperation::MIN, "-DMIN"); build_opts.add_option_if(op == ReductionOperation::MAX, "-DMAX"); - build_opts.add_option_if(is_data_type_quantized(data_type), "-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().uniform().offset)); - build_opts.add_option_if(is_data_type_quantized(data_type), "-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().uniform().scale)); - - switch(op) + build_opts.add_option_if(is_data_type_quantized(data_type), + "-DOFFSET=" + + support::cpp11::to_string(input->info()->quantization_info().uniform().offset)); + build_opts.add_option_if( + is_data_type_quantized(data_type), + "-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().uniform().scale)); + + switch (op) { case ReductionOperation::SUM_SQUARE: build_opts.add_option(("-DOPERATION=square_sum")); @@ -159,7 +177,7 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte std::string kernel_axis_name; const bool is_serial_op = needs_serialized_reduction(_op, _input->info()->data_type(), _reduction_axis); - switch(axis) + switch (axis) { case 0: { @@ -187,13 +205,17 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(vec_size)); - win.set(Window::DimX, Window::Dimension(win.x().start(), win.x().end() * _input->info()->num_channels(), win.x().step())); + win.set(Window::DimX, + Window::Dimension(win.x().start(), win.x().end() * _input->info()->num_channels(), win.x().step())); ICLKernel::configure_internal(win); ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +Status CLReductionOperationKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + unsigned int axis, + ReductionOperation op) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); return Status{}; @@ -205,18 +227,19 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); const bool is_serial_op = needs_serialized_reduction(_op, _input->info()->data_type(), _reduction_axis); - switch(_reduction_axis) + switch (_reduction_axis) { case 0: { // We use parallel reduction only in non quantized types - if(is_serial_op) + if (is_serial_op) { // Get first input and output slices - Window window_in{ window }; - window_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0))); + Window window_in{window}; + window_in.set(Window::DimX, + Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0))); - Window out_window{ window }; + Window out_window{window}; out_window.set(Window::DimX, Window::Dimension(0, 0, 0)); Window in_slice = window_in.first_slice_window_1D(); @@ -228,8 +251,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que add_1D_tensor_argument(idx, _input, in_slice); add_1D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice); - } - while(window_in.slide_window_slice_1D(in_slice) && out_window.slide_window_slice_1D(out_slice)); + } while (window_in.slide_window_slice_1D(in_slice) && out_window.slide_window_slice_1D(out_slice)); } else { @@ -251,8 +273,9 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que case 1: { // Get first input and output slices - Window window_in{ window }; - window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1))); + Window window_in{window}; + window_in.set(Window::DimY, + Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1))); Window in_slice = window_in.first_slice_window_2D(); Window out_slice = window.first_slice_window_2D(); @@ -262,15 +285,15 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice); - } - while(window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); + } while (window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); } break; case 2: { // Get first input and output slices - Window window_in{ window }; - window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2))); + Window window_in{window}; + window_in.set(Window::DimZ, + Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2))); Window in_slice = window_in.first_slice_window_3D(); Window out_slice = window.first_slice_window_3D(); @@ -280,14 +303,13 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice); - } - while(window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice)); + } while (window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice)); } break; case 3: { // Get first input and output slices - Window window_in{ window }; + Window window_in{window}; window_in.set(3, Window::Dimension(0, 1, 1)); Window in_slice = window_in.first_slice_window_4D(); Window out_slice = window.first_slice_window_4D(); @@ -298,8 +320,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que add_4D_tensor_argument(idx, _input, in_slice); add_4D_tensor_argument(idx, _output, out_slice); enqueue(queue, *this, in_slice); - } - while(window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice)); + } while (window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice)); } break; default: diff --git a/src/core/CL/kernels/CLReductionOperationKernel.h b/src/core/CL/kernels/CLReductionOperationKernel.h index b456378746..2f94b2add3 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.h +++ b/src/core/CL/kernels/CLReductionOperationKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -67,7 +68,11 @@ public: * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3 * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + unsigned int axis, + ReductionOperation op); /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. * @@ -79,7 +84,8 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + static Status + validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp index 3c74e80d33..9fd21943e8 100644 --- a/src/core/CL/kernels/CLReorgLayerKernel.cpp +++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp @@ -28,9 +28,10 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" +#include "arm_compute/core/Validate.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -51,13 +52,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); ARM_COMPUTE_RETURN_ERROR_ON(stride <= 0); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_width] % stride) != 0, "The width of the input tensor must be a multiple of stride"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_height] % stride) != 0, "The height of the input tensor must be a multiple of stride"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_width] % stride) != 0, + "The width of the input tensor must be a multiple of stride"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_height] % stride) != 0, + "The height of the input tensor must be a multiple of stride"); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { - const TensorInfo tensor_info_output = output->clone()->set_tensor_shape(misc::shape_calculator::compute_reorg_output_shape(*input, stride)); + const TensorInfo tensor_info_output = + output->clone()->set_tensor_shape(misc::shape_calculator::compute_reorg_output_shape(*input, stride)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } @@ -66,8 +70,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } } // namespace -CLReorgLayerKernel::CLReorgLayerKernel() - : _input(nullptr), _output(nullptr) +CLReorgLayerKernel::CLReorgLayerKernel() : _input(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -77,17 +80,22 @@ void CLReorgLayerKernel::configure(const ICLTensor *input, ICLTensor *output, in configure(CLKernelLibrary::get().get_compile_context(), input, output, stride); } -void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride) +void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + int32_t stride) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride)); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); _input = input; _output = output; - std::string kernel_name = std::string("reorg_layer_") + lower_string(string_from_data_layout(input->info()->data_layout())); - const size_t idx_channel = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); + std::string kernel_name = + std::string("reorg_layer_") + lower_string(string_from_data_layout(input->info()->data_layout())); + const size_t idx_channel = + get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); // Create kernel CLBuildOptions build_opts; @@ -98,7 +106,9 @@ void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, cons // Configure window // auto inizialize the output tensor if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_reorg_output_shape(*input->info(), stride))); + auto_init_if_empty(*output->info(), + input->info()->clone()->set_tensor_shape( + misc::shape_calculator::compute_reorg_output_shape(*input->info(), stride))); Window win = calculate_max_window(*output->info(), Steps()); @@ -119,7 +129,9 @@ void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, cons ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLReorgLayerKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output, int32_t stride) +Status CLReorgLayerKernel::validate(const arm_compute::ITensorInfo *input, + const arm_compute::ITensorInfo *output, + int32_t stride) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, stride)); @@ -139,7 +151,6 @@ void CLReorgLayerKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); + } while (window.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLReorgLayerKernel.h b/src/core/CL/kernels/CLReorgLayerKernel.h index 455a6170c6..f335071e9f 100644 --- a/src/core/CL/kernels/CLReorgLayerKernel.h +++ b/src/core/CL/kernels/CLReorgLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLREORGLAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp index 0d70ff4f3c..79a0f03b1e 100644 --- a/src/core/CL/kernels/CLReverseKernel.cpp +++ b/src/core/CL/kernels/CLReverseKernel.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -49,7 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->dimension(0) > 4, "Only up to 4 dimensions can be reversed"); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -60,8 +61,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } } // namespace -CLReverseKernel::CLReverseKernel() - : _input(nullptr), _output(nullptr), _axis(nullptr) +CLReverseKernel::CLReverseKernel() : _input(nullptr), _output(nullptr), _axis(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -71,10 +71,13 @@ void CLReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const configure(CLKernelLibrary::get().get_compile_context(), input, output, axis); } -void CLReverseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis) +void CLReverseKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, axis); - auto padding_info = get_padding_info({ input, output, axis }); + auto padding_info = get_padding_info({input, output, axis}); _input = input; _output = output; @@ -138,7 +141,6 @@ void CLReverseKernel::run(const Window &window, cl::CommandQueue &queue) add_1D_tensor_argument(idx, _axis, axis_slice); add_4D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_4D(slice)); + } while (collapsed.slide_window_slice_4D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLReverseKernel.h b/src/core/CL/kernels/CLReverseKernel.h index 4a21e4f802..fbd99dc883 100644 --- a/src/core/CL/kernels/CLReverseKernel.h +++ b/src/core/CL/kernels/CLReverseKernel.h @@ -60,7 +60,10 @@ public: * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const ICLTensor *axis); /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel * diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp index c0e014e8b8..703c64d8d3 100644 --- a/src/core/CL/kernels/CLSelectKernel.cpp +++ b/src/core/CL/kernels/CLSelectKernel.cpp @@ -30,10 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" namespace arm_compute @@ -51,9 +51,11 @@ Status validate_arguments(const ITensorInfo *c, const ITensorInfo *x, const ITen const bool is_same_rank = (c->tensor_shape().num_dimensions() == x->tensor_shape().num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(is_same_rank && (x->tensor_shape() != c->tensor_shape())); - ARM_COMPUTE_RETURN_ERROR_ON(!is_same_rank && ((c->tensor_shape().num_dimensions() > 1) || (c->tensor_shape().x() != x->tensor_shape()[x->tensor_shape().num_dimensions() - 1]))); + ARM_COMPUTE_RETURN_ERROR_ON(!is_same_rank && + ((c->tensor_shape().num_dimensions() > 1) || + (c->tensor_shape().x() != x->tensor_shape()[x->tensor_shape().num_dimensions() - 1]))); - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(x, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(x, output); @@ -63,13 +65,16 @@ Status validate_arguments(const ITensorInfo *c, const ITensorInfo *x, const ITen } } // namespace -CLSelectKernel::CLSelectKernel() - : _c(nullptr), _x(nullptr), _y(nullptr), _output(nullptr), _has_same_rank(false) +CLSelectKernel::CLSelectKernel() : _c(nullptr), _x(nullptr), _y(nullptr), _output(nullptr), _has_same_rank(false) { _type = CLKernelType::ELEMENTWISE; } -void CLSelectKernel::configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output) +void CLSelectKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *c, + const ICLTensor *x, + const ICLTensor *y, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(c, x, y, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(c->info(), x->info(), y->info(), output->info())); @@ -80,7 +85,7 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC _output = output; _has_same_rank = (c->info()->tensor_shape().num_dimensions() == x->info()->tensor_shape().num_dimensions()); - auto padding_info = get_padding_info({ c, x, y, output }); + auto padding_info = get_padding_info({c, x, y, output}); const unsigned int vec_size_x = adjust_vec_size(16 / x->info()->element_size(), x->info()->dimension(0)); const int vec_size_x_leftovers = output->info()->dimension(0) % vec_size_x; @@ -92,14 +97,14 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC // Create kernel std::string kernel_name = "select"; - if(_has_same_rank) + if (_has_same_rank) { kernel_name += "_same_rank"; } else { const bool is_input_rank_greater_than_two = x->info()->tensor_shape().num_dimensions() > 2; - if(is_input_rank_greater_than_two) + if (is_input_rank_greater_than_two) { const size_t width = x->info()->tensor_shape().x(); const size_t height = x->info()->tensor_shape().y(); @@ -128,7 +133,8 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLSelectKernel::validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output) +Status +CLSelectKernel::validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(c, x, y, output)); return Status{}; @@ -142,7 +148,7 @@ void CLSelectKernel::run(const arm_compute::Window &window, cl::CommandQueue &qu Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = collapsed.first_slice_window_3D(); - if(!_has_same_rank) + if (!_has_same_rank) { Window vector_slice = window.first_slice_window_1D(); vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0)); @@ -153,7 +159,7 @@ void CLSelectKernel::run(const arm_compute::Window &window, cl::CommandQueue &qu do { unsigned int idx = _has_same_rank ? 0 : num_arguments_per_1D_tensor(); - if(_has_same_rank) + if (_has_same_rank) { add_3D_tensor_argument(idx, _c, slice); } @@ -162,7 +168,6 @@ void CLSelectKernel::run(const arm_compute::Window &window, cl::CommandQueue &qu add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_3D(slice)); + } while (collapsed.slide_window_slice_3D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLSelectKernel.h b/src/core/CL/kernels/CLSelectKernel.h index b8c10cd7cf..c4256fd743 100644 --- a/src/core/CL/kernels/CLSelectKernel.h +++ b/src/core/CL/kernels/CLSelectKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLSELECTKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -60,7 +61,11 @@ public: * @param[out] y Second input tensor. Data types supported: Same as @p x * @param[in] output Output tensor. Data types supported: Same as @p x. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output); + void configure(const CLCompileContext &compile_context, + const ICLTensor *c, + const ICLTensor *x, + const ICLTensor *y, + ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel * * @param[in] c Condition input tensor. Data types supported: U8. diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp index 3632ae2b03..f4c0839ad2 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -38,19 +39,22 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *paddings, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *block_info, + const ITensorInfo *paddings, + const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, paddings, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(block_info->tensor_shape(), TensorShape{ 2 }); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(block_info->tensor_shape(), TensorShape{2}); ARM_COMPUTE_RETURN_ERROR_ON(paddings->num_dimensions() > 2); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(paddings->tensor_shape(), TensorShape{ 2, 2 }); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(paddings->tensor_shape(), TensorShape{2, 2}); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { const DataLayout data_layout = input->data_layout(); const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); @@ -61,7 +65,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf return Status{}; } -Status validate_arguments_static(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, +Status validate_arguments_static(const ITensorInfo *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); @@ -70,9 +78,10 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x < 1 || block_shape_y < 1); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { - TensorShape expected_output_shape = misc::shape_calculator::compute_space_to_batch_shape(input, block_shape_x, block_shape_y, padding_left, padding_right); + TensorShape expected_output_shape = misc::shape_calculator::compute_space_to_batch_shape( + input, block_shape_x, block_shape_y, padding_left, padding_right); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), expected_output_shape); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); @@ -88,16 +97,24 @@ CLSpaceToBatchLayerKernel::CLSpaceToBatchLayerKernel() _type = CLKernelType::ELEMENTWISE; } -void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output) +void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, + const ICLTensor *block_shape, + const ICLTensor *paddings, + ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output); } -void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output) +void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *block_shape, + const ICLTensor *paddings, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info())); - auto padding_info = get_padding_info({ input, block_shape, paddings, output }); + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info())); + auto padding_info = get_padding_info({input, block_shape, paddings, output}); _input = input; _block_shape = block_shape; @@ -111,14 +128,17 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width))); build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height))); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch))); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width))); build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height))); build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch))); - _kernel = create_kernel(compile_context, "space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); + _kernel = create_kernel(compile_context, + "space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); @@ -126,22 +146,34 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, - ICLTensor *output) +void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + ICLTensor *output) { - configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left, padding_right, output); + configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left, + padding_right, output); } -void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, - const Size2D &padding_right, - ICLTensor *output) +void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - TensorShape output_shape = misc::shape_calculator::compute_space_to_batch_shape(input->info(), block_shape_x, block_shape_y, padding_left, padding_right); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + TensorShape output_shape = misc::shape_calculator::compute_space_to_batch_shape( + input->info(), block_shape_x, block_shape_y, padding_left, padding_right); + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), + input->info()->quantization_info()); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, padding_left, padding_right, output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, padding_left, + padding_right, output->info())); _input = input; _output = output; @@ -153,7 +185,8 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width))); build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height))); build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch))); @@ -166,22 +199,32 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex build_opts.add_option("-DPAD_RIGHT_X=" + support::cpp11::to_string(padding_right.x())); build_opts.add_option("-DPAD_LEFT_Y=" + support::cpp11::to_string(padding_left.y())); build_opts.add_option("-DPAD_RIGHT_Y=" + support::cpp11::to_string(padding_right.y())); - _kernel = create_kernel(compile_context, "space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); + _kernel = create_kernel( + compile_context, "space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); ICLKernel::configure_internal(win); } -Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output) +Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input, + const ITensorInfo *block_shape, + const ITensorInfo *paddings, + const ITensorInfo *output) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, paddings, output)); return Status{}; } -Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, +Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, const ITensorInfo *output) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, padding_left, padding_right, output)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_arguments_static(input, block_shape_x, block_shape_y, padding_left, padding_right, output)); return Status{}; } @@ -218,7 +261,6 @@ void CLSpaceToBatchLayerKernel::run(const Window &window, cl::CommandQueue &queu add_3D_tensor_argument(idx, _output, slice_out); enqueue(queue, *this, slice_out, lws_hint()); ++batch_id; - } - while(window.slide_window_slice_3D(slice_out)); + } while (window.slide_window_slice_3D(slice_out)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h index 4817cfeef2..f9dce9db47 100644 --- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h +++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -63,7 +64,11 @@ public: * @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const ICLTensor *block_shape, + const ICLTensor *paddings, + ICLTensor *output); /** Initialise the kernel's input and output. (Static block shape and paddings) * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -73,7 +78,12 @@ public: * @param[in] padding_right The padding at the end of every dimension of the output tensor. * @param[out] output Tensor output. Data types supported: same as @p input */ - void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output); + void configure(const ICLTensor *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + ICLTensor *output); /** Initialise the kernel's input and output. (Static block shape and paddings) * * @param[in] compile_context The compile context to be used. @@ -84,8 +94,13 @@ public: * @param[in] padding_right The padding at the end of every dimension of the output tensor. * @param[out] output Tensor output. Data types supported: same as @p input */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, - ICLTensor *output); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -95,7 +110,10 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + const ITensorInfo *block_shape, + const ITensorInfo *paddings, + const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings) * * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. @@ -107,7 +125,12 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + const int block_shape_x, + const int block_shape_y, + const Size2D &padding_left, + const Size2D &padding_right, + const ITensorInfo *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp index c5ffdb588b..25662b5c62 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -45,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { const DataLayout data_layout = input->data_layout(); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -64,8 +65,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } } // namespace -CLSpaceToDepthLayerKernel::CLSpaceToDepthLayerKernel() - : _input(nullptr), _output(nullptr), _block_shape() +CLSpaceToDepthLayerKernel::CLSpaceToDepthLayerKernel() : _input(nullptr), _output(nullptr), _block_shape() { _type = CLKernelType::ELEMENTWISE; } @@ -75,10 +75,13 @@ void CLSpaceToDepthLayerKernel::configure(const ICLTensor *input, ICLTensor *out configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape); } -void CLSpaceToDepthLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape) +void CLSpaceToDepthLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + int32_t block_shape) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - auto padding_info = get_padding_info({ input, output }); + auto padding_info = get_padding_info({input, output}); TensorShape output_shape = compute_space_to_depth_shape(input->info(), block_shape); auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); @@ -94,11 +97,14 @@ void CLSpaceToDepthLayerKernel::configure(const CLCompileContext &compile_contex // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(output->info()->data_type()))); + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(output->info()->data_type()))); build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_channel))); build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape)); build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(output->info()->dimension(idx_width))); - _kernel = create_kernel(compile_context, "space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()); + _kernel = create_kernel(compile_context, + "space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), + build_opts.options()); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); @@ -136,7 +142,6 @@ void CLSpaceToDepthLayerKernel::run(const Window &window, cl::CommandQueue &queu enqueue(queue, *this, slice_out, lws_hint()); ++batch_id; - } - while(window.slide_window_slice_3D(slice_out)); + } while (window.slide_window_slice_3D(slice_out)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h index bb1ac5f9a6..d0932919e0 100644 --- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h +++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -61,7 +62,8 @@ public: * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); + void + configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel. * * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp index 075c93ab60..23e26716e7 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.cpp +++ b/src/core/CL/kernels/CLStackLayerKernel.cpp @@ -30,10 +30,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" - #include "support/StringSupport.h" using namespace arm_compute::misc::shape_calculator; @@ -42,7 +42,11 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *input, + unsigned int axis, + unsigned int idx_input, + unsigned int num_tensors, + const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); @@ -51,9 +55,10 @@ Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - if(output->total_size() != 0) + if (output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_stack_shape(*input, axis, num_tensors)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), + compute_stack_shape(*input, axis, num_tensors)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); } @@ -61,7 +66,8 @@ Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, unsigned int axis, unsigned int num_tensors, ITensorInfo *output) +std::pair +validate_and_configure_window(ITensorInfo *input, unsigned int axis, unsigned int num_tensors, ITensorInfo *output) { // Output auto inizialitation if not yet initialized auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_stack_shape(*input, axis, num_tensors))); @@ -73,18 +79,23 @@ std::pair validate_and_configure_window(ITensorInfo *input, unsi } } // namespace -CLStackLayerKernel::CLStackLayerKernel() - : _input(nullptr), _output(nullptr) +CLStackLayerKernel::CLStackLayerKernel() : _input(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } -void CLStackLayerKernel::configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output) +void CLStackLayerKernel::configure( + const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, axis, idx_input, num_tensors, output); } -void CLStackLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output) +void CLStackLayerKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + unsigned int axis, + unsigned int idx_input, + unsigned int num_tensors, + ICLTensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), axis, idx_input, num_tensors, output->info())); @@ -112,10 +123,15 @@ void CLStackLayerKernel::configure(const CLCompileContext &compile_context, cons _kernel.setArg(idx, idx_input); } -Status CLStackLayerKernel::validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output) +Status CLStackLayerKernel::validate(const ITensorInfo *input, + unsigned int axis, + unsigned int idx_input, + unsigned int num_tensors, + const ITensorInfo *output) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, axis, idx_input, num_tensors, output)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), axis, num_tensors, output->clone().get()).first); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), axis, num_tensors, output->clone().get()).first); return Status{}; } diff --git a/src/core/CL/kernels/CLStackLayerKernel.h b/src/core/CL/kernels/CLStackLayerKernel.h index 2865127a90..d3c17f529c 100644 --- a/src/core/CL/kernels/CLStackLayerKernel.h +++ b/src/core/CL/kernels/CLStackLayerKernel.h @@ -26,6 +26,7 @@ #define ARM_COMPUTE_CLSTACKLAYERKERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" namespace arm_compute @@ -60,7 +61,8 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input. * */ - void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + void configure( + const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); /** Initialise the kernel's inputs and output * * @note Supported input tensor rank: up to 4 @@ -74,7 +76,12 @@ public: * @param[out] output Output tensor. Data types supported: Same as @p input. * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + unsigned int axis, + unsigned int idx_input, + unsigned int num_tensors, + ICLTensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel * * @note Supported input tensor rank: up to 4 @@ -88,7 +95,11 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, + unsigned int axis, + unsigned int idx_input, + unsigned int num_tensors, + const ITensorInfo *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp index 9acbafdb19..a8f6112820 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.cpp +++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp @@ -22,11 +22,13 @@ * SOFTWARE. */ #include "src/core/CL/kernels/CLStridedSliceKernel.h" + #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/utils/helpers/tensor_transform.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "src/core/utils/helpers/bit_ops.h" @@ -37,9 +39,14 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +Status validate_arguments(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); @@ -48,19 +55,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ARM_COMPUTE_RETURN_ERROR_ON(starts.num_dimensions() > input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(ends.num_dimensions() > input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(strides.num_dimensions() > input->num_dimensions()); - ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i) - { - return i == 0; - })); + ARM_COMPUTE_RETURN_ERROR_ON( + std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i) { return i == 0; })); // Get expected output shape - const TensorShape exp_output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(*input, - starts, ends, strides, - begin_mask, end_mask, shrink_axis_mask); + const TensorShape exp_output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape( + *input, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); ARM_COMPUTE_RETURN_ERROR_ON(exp_output_shape.total_size() == 0); // Checks output if configured - if(output->total_size() != 0) + if (output->total_size() != 0) { const TensorInfo exp_output_info = output->clone()->set_tensor_shape(exp_output_shape); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &exp_output_info); @@ -76,28 +80,33 @@ CLStridedSliceKernel::CLStridedSliceKernel() _type = CLKernelType::ELEMENTWISE; } -void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, + const ITensorInfo *input, + ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - auto padding_info = get_padding_info({ input, output }); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); + auto padding_info = get_padding_info({input, output}); + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); const TensorShape &input_shape = input->tensor_shape(); Coordinates starts_abs; Coordinates ends_abs; Coordinates final_strides; - std::tie(starts_abs, ends_abs, final_strides) = arm_compute::helpers::tensor_transform::calculate_strided_slice_coords( - input_shape, - starts, ends, strides, - begin_mask, end_mask, shrink_axis_mask); + std::tie(starts_abs, ends_abs, final_strides) = + arm_compute::helpers::tensor_transform::calculate_strided_slice_coords(input_shape, starts, ends, strides, + begin_mask, end_mask, shrink_axis_mask); // Configure kernel window - const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(*input, - starts, ends, strides, - begin_mask, end_mask, shrink_axis_mask); + const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape( + *input, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask); auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape)); Window win = calculate_max_window(*output, Steps()); @@ -108,29 +117,33 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co const bool multi_access_x = !is_shrink_on_x && (final_strides.x() == 1) && (output_width_x / vec_size_x > 0); // Update window if needed - if(multi_access_x) + if (multi_access_x) { Window &updated_window = win; updated_window.set(Window::DimX, - Window::Dimension(updated_window.x().start(), ceil_to_multiple(updated_window.x().end(), vec_size_x), vec_size_x)); + Window::Dimension(updated_window.x().start(), + ceil_to_multiple(updated_window.x().end(), vec_size_x), vec_size_x)); } ICLKernel::configure_internal(win); // Create build options CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type()))); - for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) + build_opts.add_option("-DDATA_TYPE=" + + get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type()))); + for (unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { const bool is_shrink = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, i); - build_opts.add_option("-DSTART_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(starts_abs[i])); - build_opts.add_option("-DSTRIDE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(final_strides[i])); + build_opts.add_option("-DSTART_" + support::cpp11::to_string(i) + "=" + + support::cpp11::to_string(starts_abs[i])); + build_opts.add_option("-DSTRIDE_" + support::cpp11::to_string(i) + "=" + + support::cpp11::to_string(final_strides[i])); build_opts.add_option_if(is_shrink, "-DSHRINK_" + support::cpp11::to_string(i)); } - build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max(output_width_x - vec_size_x, 0))); + build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string( + std::max(output_width_x - vec_size_x, 0))); build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option_if_else(input_shape.num_dimensions() > 2, - "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), - "-DSRC_DEPTH=1"); + "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), "-DSRC_DEPTH=1"); build_opts.add_option_if_else(output->num_dimensions() > 2, "-DDST_DEPTH=" + support::cpp11::to_string(output->tensor_shape().z()), "-DDST_DEPTH=1"); @@ -142,7 +155,7 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co _config_id = "strided_slice"; _config_id += "_"; _config_id += lower_string(string_from_data_type(input->data_type())); - for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i) + for (unsigned int i = 0; i < input_shape.num_dimensions(); ++i) { _config_id += "_"; _config_id += support::cpp11::to_string(input->dimension(i)); @@ -156,11 +169,17 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); } -Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) +Status CLStridedSliceKernel::validate(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask)); return Status{}; } @@ -170,8 +189,9 @@ void CLStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, cl ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); + const auto src = + utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); + auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); Window slice = window_collapsed.first_slice_window_4D(); @@ -182,7 +202,6 @@ void CLStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, cl add_4D_tensor_argument(idx, src, slice); add_4D_tensor_argument(idx, dst, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(window_collapsed.slide_window_slice_4D(slice)); + } while (window_collapsed.slide_window_slice_4D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLStridedSliceKernel.h b/src/core/CL/kernels/CLStridedSliceKernel.h index 4c201504f5..1cf5bcacec 100644 --- a/src/core/CL/kernels/CLStridedSliceKernel.h +++ b/src/core/CL/kernels/CLStridedSliceKernel.h @@ -25,6 +25,7 @@ #define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H #include "arm_compute/core/Types.h" + #include "src/core/CL/ICLKernel.h" #include @@ -53,9 +54,15 @@ public: * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + void configure(const CLCompileContext &compile_context, + const ITensorInfo *input, + ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask); /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel * @@ -71,9 +78,14 @@ public: * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1. * A slice of size 1 starting from starts[i] in the dimension must be preserved. */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, - int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); + static Status validate(const ITensorInfo *input, + const ITensorInfo *output, + const Coordinates &starts, + const Coordinates &ends, + const BiStrides &strides, + int32_t begin_mask, + int32_t end_mask, + int32_t shrink_axis_mask); // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp index 3e7015cfd2..fa996c4008 100644 --- a/src/core/CL/kernels/CLTileKernel.cpp +++ b/src/core/CL/kernels/CLTileKernel.cpp @@ -22,9 +22,11 @@ * SOFTWARE. */ #include "src/core/CL/kernels/CLTileKernel.h" + #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/StringSupport.h" @@ -39,15 +41,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(multiples.size() > 4); ARM_COMPUTE_RETURN_ERROR_ON(multiples.empty()); - ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e) - { - return e == 0; - })); + ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e) { return e == 0; })); // Validate output if initialized - if(output->total_size() != 0) + if (output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS( + misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } @@ -55,8 +55,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } } // namespace -CLTileKernel::CLTileKernel() - : _input(nullptr), _output(nullptr) +CLTileKernel::CLTileKernel() : _input(nullptr), _output(nullptr) { _type = CLKernelType::ELEMENTWISE; } @@ -66,7 +65,10 @@ void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Mu configure(CLKernelLibrary::get().get_compile_context(), input, output, multiples); } -void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples) +void CLTileKernel::configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const Multiples &multiples) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -104,15 +106,14 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT // Configure window without padding Window win = calculate_max_window(*output->info()); - if(multi_access_x) + if (multi_access_x) { // If multi-access is enabled, no thread should cross the tile boundaries. This means we need // as many threads as those to cover a single tile times multiples[0]. Note that if threads // do not cross the boundaries of the tiles, they won't cross the boundaries of the last tile, and // we don't need to pad the output const unsigned int size_win_x = ceil_to_multiple(input->info()->dimension(0), vec_size_x) * multiples[0]; - win.set(Window::DimX, - Window::Dimension(win.x().start(), size_win_x, vec_size_x)); + win.set(Window::DimX, Window::Dimension(win.x().start(), size_win_x, vec_size_x)); } ICLKernel::configure_internal(win); @@ -121,7 +122,7 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT _config_id = "tile"; _config_id += "_"; _config_id += lower_string(string_from_data_type(input->info()->data_type())); - for(unsigned int i = 0; i < multiples.size(); ++i) + for (unsigned int i = 0; i < multiples.size(); ++i) { _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(i)); @@ -150,7 +151,6 @@ void CLTileKernel::run(const Window &window, cl::CommandQueue &queue) add_4D_tensor_argument(idx, _input, slice); add_4D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice, lws_hint()); - } - while(collapsed.slide_window_slice_4D(slice)); + } while (collapsed.slide_window_slice_4D(slice)); } } // namespace arm_compute diff --git a/src/core/CL/kernels/CLTileKernel.h b/src/core/CL/kernels/CLTileKernel.h index 41752ca90b..c3486aecef 100644 --- a/src/core/CL/kernels/CLTileKernel.h +++ b/src/core/CL/kernels/CLTileKernel.h @@ -64,7 +64,10 @@ public: * @param[out] output Destination tensor. Same as @p input * */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples); + void configure(const CLCompileContext &compile_context, + const ICLTensor *input, + ICLTensor *output, + const Multiples &multiples); /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel * * @param[in] input Source tensor info. Data type supported: All. -- cgit v1.2.1