aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels
diff options
context:
space:
mode:
authorFelix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-27 17:46:17 +0100
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-28 12:08:05 +0000
commitafd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree03bc7d5a762099989b16a656fa8d397b490ed70e /src/core/CL/kernels
parentbdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
downloadComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
Apply clang-format on repository
Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp80
-rw-r--r--src/core/CL/kernels/CLArgMinMaxLayerKernel.h10
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp125
-rw-r--r--src/core/CL/kernels/CLBatchNormalizationLayerKernel.h32
-rw-r--r--src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp76
-rw-r--r--src/core/CL/kernels/CLBatchToSpaceLayerKernel.h25
-rw-r--r--src/core/CL/kernels/CLBitwiseKernel.cpp25
-rw-r--r--src/core/CL/kernels/CLBitwiseKernel.h6
-rw-r--r--src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp42
-rw-r--r--src/core/CL/kernels/CLBoundingBoxTransformKernel.h16
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp64
-rw-r--r--src/core/CL/kernels/CLChannelShuffleLayerKernel.h5
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.cpp75
-rw-r--r--src/core/CL/kernels/CLComparisonKernel.h14
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp25
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h5
-rw-r--r--src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp84
-rw-r--r--src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h23
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp29
-rw-r--r--src/core/CL/kernels/CLDepthToSpaceLayerKernel.h4
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp199
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h45
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.cpp42
-rw-r--r--src/core/CL/kernels/CLFFTDigitReverseKernel.h18
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.cpp46
-rw-r--r--src/core/CL/kernels/CLFFTRadixStageKernel.h9
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.cpp26
-rw-r--r--src/core/CL/kernels/CLFFTScaleKernel.h9
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp59
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.h18
-rw-r--r--src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp129
-rw-r--r--src/core/CL/kernels/CLFuseBatchNormalizationKernel.h41
-rw-r--r--src/core/CL/kernels/CLGatherKernel.cpp36
-rw-r--r--src/core/CL/kernels/CLGatherKernel.h10
-rw-r--r--src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp33
-rw-r--r--src/core/CL/kernels/CLGenerateProposalsLayerKernel.h7
-rw-r--r--src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp54
-rw-r--r--src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h16
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp45
-rw-r--r--src/core/CL/kernels/CLL2NormalizeLayerKernel.h11
-rw-r--r--src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp45
-rw-r--r--src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h11
-rw-r--r--src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp19
-rw-r--r--src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h5
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp72
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.h7
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp56
-rw-r--r--src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h9
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.cpp95
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.h20
-rw-r--r--src/core/CL/kernels/CLPriorBoxLayerKernel.cpp83
-rw-r--r--src/core/CL/kernels/CLPriorBoxLayerKernel.h27
-rw-r--r--src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp47
-rw-r--r--src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h9
-rw-r--r--src/core/CL/kernels/CLROIAlignLayerKernel.cpp51
-rw-r--r--src/core/CL/kernels/CLROIAlignLayerKernel.h14
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.cpp38
-rw-r--r--src/core/CL/kernels/CLROIPoolingLayerKernel.h14
-rw-r--r--src/core/CL/kernels/CLRangeKernel.cpp38
-rw-r--r--src/core/CL/kernels/CLRangeKernel.h1
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.cpp103
-rw-r--r--src/core/CL/kernels/CLReductionOperationKernel.h10
-rw-r--r--src/core/CL/kernels/CLReorgLayerKernel.cpp41
-rw-r--r--src/core/CL/kernels/CLReorgLayerKernel.h1
-rw-r--r--src/core/CL/kernels/CLReverseKernel.cpp16
-rw-r--r--src/core/CL/kernels/CLReverseKernel.h5
-rw-r--r--src/core/CL/kernels/CLSelectKernel.cpp33
-rw-r--r--src/core/CL/kernels/CLSelectKernel.h7
-rw-r--r--src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp100
-rw-r--r--src/core/CL/kernels/CLSpaceToBatchLayerKernel.h35
-rw-r--r--src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp23
-rw-r--r--src/core/CL/kernels/CLSpaceToDepthLayerKernel.h4
-rw-r--r--src/core/CL/kernels/CLStackLayerKernel.cpp38
-rw-r--r--src/core/CL/kernels/CLStackLayerKernel.h17
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.cpp101
-rw-r--r--src/core/CL/kernels/CLStridedSliceKernel.h24
-rw-r--r--src/core/CL/kernels/CLTileKernel.cpp30
-rw-r--r--src/core/CL/kernels/CLTileKernel.h5
78 files changed, 1903 insertions, 969 deletions
diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
index 2728958add..5b72354abe 100644
--- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -44,16 +45,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32, DataType::S64);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Only ARG_IDX_MAX and ARG_IDX_MIN are supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN,
+ "Only ARG_IDX_MAX and ARG_IDX_MIN are supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions,
+ "Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64, DataType::U64);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32, DataType::S32, DataType::S64,
+ DataType::U64);
}
return Status{};
@@ -66,22 +71,34 @@ CLArgMinMaxLayerKernel::CLArgMinMaxLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLArgMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op)
+void CLArgMinMaxLayerKernel::configure(const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int axis,
+ ReductionOperation op)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op);
}
-void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op)
+void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int axis,
+ ReductionOperation op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- TensorShape output_shape{ input->info()->tensor_shape() };
+ TensorShape output_shape{input->info()->tensor_shape()};
output_shape.set(axis, 1);
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(DataType::S32).reset_padding().set_is_resizable(true));
+ auto_init_if_empty(*output->info(), input->info()
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_type(DataType::S32)
+ .reset_padding()
+ .set_is_resizable(true));
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
@@ -90,11 +107,14 @@ void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context,
// Set build options
const auto adjusted_vector_size = adjust_vec_size(16U, input->info()->dimension(0));
- const auto vector_size = (adjusted_vector_size == 3U && axis == 0U) ? 2U : adjusted_vector_size; // the opencl kernel only supports sizes 2, 4, 8 and 16.
+ const auto vector_size = (adjusted_vector_size == 3U && axis == 0U)
+ ? 2U
+ : adjusted_vector_size; // the opencl kernel only supports sizes 2, 4, 8 and 16.
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % vector_size));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" +
+ support::cpp11::to_string(input->info()->dimension(0) % vector_size));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vector_size));
build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DFLOAT_DATA_TYPE");
build_opts.add_option_if_else(op == ReductionOperation::ARG_IDX_MAX, "-DARG_MAX", "-DARG_MIN");
@@ -104,7 +124,7 @@ void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context,
// Create kernel
std::string kernel_axis_name;
- switch(axis)
+ switch (axis)
{
case 0:
build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
@@ -135,7 +155,10 @@ void CLArgMinMaxLayerKernel::configure(const CLCompileContext &compile_context,
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLArgMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
+Status CLArgMinMaxLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ unsigned int axis,
+ ReductionOperation op)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
return Status{};
@@ -146,7 +169,7 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- switch(_reduction_axis)
+ switch (_reduction_axis)
{
case 0:
{
@@ -154,7 +177,8 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
Window out_window(window);
Window in_window(window);
out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
- in_window.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
+ in_window.set(Window::DimX,
+ Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
in_window.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), 1u));
// Get first input and output slices
@@ -166,15 +190,15 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(in_window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice));
+ } while (in_window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice));
}
break;
case 1:
{
// Get first input and output slices
- Window window_in{ window };
- window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1)));
+ Window window_in{window};
+ window_in.set(Window::DimY,
+ Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1)));
Window in_slice = window_in.first_slice_window_2D();
Window out_slice = window.first_slice_window_2D();
@@ -184,15 +208,15 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
+ } while (window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
}
break;
case 2:
{
// Get first input and output slices
- Window window_in{ window };
- window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2)));
+ Window window_in{window};
+ window_in.set(Window::DimZ,
+ Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2)));
Window in_slice = window_in.first_slice_window_3D();
Window out_slice = window.first_slice_window_3D();
@@ -202,14 +226,13 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice));
+ } while (window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice));
}
break;
case 3:
{
// Get first input and output slices
- Window window_in{ window };
+ Window window_in{window};
window_in.set(3, Window::Dimension(0, 1, 1));
Window in_slice = window_in.first_slice_window_4D();
Window out_slice = window.first_slice_window_4D();
@@ -220,8 +243,7 @@ void CLArgMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
add_4D_tensor_argument(idx, _input, in_slice);
add_4D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice));
+ } while (window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice));
}
break;
default:
diff --git a/src/core/CL/kernels/CLArgMinMaxLayerKernel.h b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h
index 5f36bdf113..fb3b41b0de 100644
--- a/src/core/CL/kernels/CLArgMinMaxLayerKernel.h
+++ b/src/core/CL/kernels/CLArgMinMaxLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -72,7 +73,11 @@ public:
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
* @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int axis,
+ ReductionOperation op);
/** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel.
*
@@ -84,7 +89,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
index 3fa8a8edaa..c88a852a44 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp
@@ -23,58 +23,64 @@
*/
#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
using namespace arm_compute;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta, const ITensorInfo *gamma,
- float epsilon, ActivationLayerInfo act_info)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *var,
+ const ITensorInfo *beta,
+ const ITensorInfo *gamma,
+ float epsilon,
+ ActivationLayerInfo act_info)
{
ARM_COMPUTE_UNUSED(epsilon);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, var);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, var);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0));
- if(beta != nullptr)
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(get_data_layout_dimension_index(
+ input->data_layout(), DataLayoutDimension::CHANNEL)) != mean->dimension(0));
+ if (beta != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, beta);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, beta);
}
- if(gamma != nullptr)
+ if (gamma != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, gamma);
}
- if(act_info.enabled())
+ if (act_info.enabled())
{
ActivationLayerInfo::ActivationFunction act = act_info.activation();
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32 && input->data_type() != DataType::F16);
- ARM_COMPUTE_RETURN_ERROR_ON(act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU
- && act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU
- && act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
+ ARM_COMPUTE_RETURN_ERROR_ON(act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::RELU &&
+ act != ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU &&
+ act !=
+ ActivationLayerInfo::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU);
ARM_COMPUTE_RETURN_ERROR_ON(act_info.b() > act_info.a());
}
- if(output != nullptr && output->total_size() != 0)
+ if (output != nullptr && output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
@@ -86,14 +92,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
std::pair<Status, Window> validate_and_configure_window_nchw(ITensorInfo *input, ITensorInfo *output)
{
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0));
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(16 / input->element_size(), input->dimension(0));
// Configure kernel window
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
bool window_changed = false;
- if(output != nullptr)
+ if (output != nullptr)
{
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
window_changed = update_window_and_padding(win, input_access, output_access);
@@ -104,30 +111,50 @@ std::pair<Status, Window> validate_and_configure_window_nchw(ITensorInfo *input,
window_changed = update_window_and_padding(win, input_access);
}
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
CLBatchNormalizationLayerKernel::CLBatchNormalizationLayerKernel()
- : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0), _run_in_place(false)
+ : _input(nullptr),
+ _output(nullptr),
+ _mean(nullptr),
+ _var(nullptr),
+ _beta(nullptr),
+ _gamma(nullptr),
+ _epsilon(0),
+ _run_in_place(false)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma,
- float epsilon, ActivationLayerInfo act_info)
+void CLBatchNormalizationLayerKernel::configure(ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *var,
+ const ICLTensor *beta,
+ const ICLTensor *gamma,
+ float epsilon,
+ ActivationLayerInfo act_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, var, beta, gamma, epsilon, act_info);
}
-void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta,
- const ICLTensor *gamma,
- float epsilon, ActivationLayerInfo act_info)
+void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *var,
+ const ICLTensor *beta,
+ const ICLTensor *gamma,
+ float epsilon,
+ ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, var);
- auto padding_info = get_padding_info({ input, output, mean, var, beta, gamma });
+ auto padding_info = get_padding_info({input, output, mean, var, beta, gamma});
_input = input;
_output = output;
_mean = mean;
@@ -142,13 +169,15 @@ void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_
mean->info(), var->info(), (beta != nullptr) ? beta->info() : nullptr,
(gamma != nullptr) ? gamma->info() : nullptr, epsilon, act_info));
- unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0));
+ unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0));
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" +
+ support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration));
build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
@@ -157,29 +186,33 @@ void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_
build_opts.add_option_if(gamma == nullptr, "-DUSE_DEFAULT_GAMMA");
// Create kernel
- _kernel = create_kernel(compile_context, "batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
+ _kernel =
+ create_kernel(compile_context,
+ "batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Set kernel static arguments
unsigned int include_output = (!_run_in_place) ? 1 : 0;
- unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor() + 2 * num_arguments_per_1D_tensor(); // Skip the input and output parameters
- if(_beta != nullptr)
+ unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor() +
+ 2 * num_arguments_per_1D_tensor(); // Skip the input and output parameters
+ if (_beta != nullptr)
{
idx += num_arguments_per_1D_tensor(); // Skip beta parameter
}
- if(_gamma != nullptr)
+ if (_gamma != nullptr)
{
idx += num_arguments_per_1D_tensor(); // Skip gamma parameter
}
_kernel.setArg<cl_float>(idx++, _epsilon);
- if(output != nullptr)
+ if (output != nullptr)
{
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*output->info(), *input->info()->clone());
}
// Configure kernel window
- if(input->info()->data_layout() == DataLayout::NHWC)
+ if (input->info()->data_layout() == DataLayout::NHWC)
{
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win);
@@ -205,18 +238,23 @@ void CLBatchNormalizationLayerKernel::configure(const CLCompileContext &compile_
_config_id += lower_string(string_from_data_layout(input->info()->data_layout()));
}
-Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta, const ITensorInfo *gamma,
- float epsilon, ActivationLayerInfo act_info)
+Status CLBatchNormalizationLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *var,
+ const ITensorInfo *beta,
+ const ITensorInfo *gamma,
+ float epsilon,
+ ActivationLayerInfo act_info)
{
const bool run_in_place = (output == nullptr) || (output == input);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, var, beta, gamma, epsilon, act_info));
- if(input->data_layout() != DataLayout::NHWC)
+ if (input->data_layout() != DataLayout::NHWC)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_nchw(input->clone().get(), (run_in_place) ? nullptr : output->clone().get())
- .first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window_nchw(input->clone().get(), (run_in_place) ? nullptr : output->clone().get())
+ .first);
}
return Status{};
@@ -236,11 +274,11 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue
unsigned int idx = (1 + include_output) * num_arguments_per_3D_tensor();
add_1D_tensor_argument(idx, _mean, vector_slice);
add_1D_tensor_argument(idx, _var, vector_slice);
- if(_beta != nullptr)
+ if (_beta != nullptr)
{
add_1D_tensor_argument(idx, _beta, vector_slice);
}
- if(_gamma != nullptr)
+ if (_gamma != nullptr)
{
add_1D_tensor_argument(idx, _gamma, vector_slice);
}
@@ -249,11 +287,10 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue
{
idx = 0;
add_3D_tensor_argument(idx, _input, slice);
- if(!_run_in_place)
+ if (!_run_in_place)
{
add_3D_tensor_argument(idx, _output, slice);
}
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_3D(slice));
+ } while (window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index acbe0f2a26..1a88d2a8c5 100644
--- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -64,7 +65,13 @@ public:
* @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
*/
- void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
+ void configure(ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *var,
+ const ICLTensor *beta = nullptr,
+ const ICLTensor *gamma = nullptr,
+ float epsilon = 0.001f,
ActivationLayerInfo act_info = ActivationLayerInfo());
/** Set the input and output tensors.
*
@@ -82,8 +89,15 @@ public:
* @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr,
- const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *var,
+ const ICLTensor *beta = nullptr,
+ const ICLTensor *gamma = nullptr,
+ float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
@@ -99,10 +113,14 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *var,
+ const ITensorInfo *beta = nullptr,
+ const ITensorInfo *gamma = nullptr,
+ float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
index 143a842d02..c640b5a8d6 100644
--- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp
@@ -25,13 +25,14 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
-#include "arm_compute/core/TensorInfo.h"
using namespace arm_compute::misc::shape_calculator;
namespace arm_compute
@@ -46,7 +47,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -54,7 +55,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf
return Status{};
}
-Status validate_arguments_static(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const ITensorInfo *output, const CropInfo &crop_info)
+Status validate_arguments_static(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const ITensorInfo *output,
+ const CropInfo &crop_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
@@ -66,10 +71,11 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] % (block_shape_x * block_shape_y) != 0);
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- const TensorShape expected_output_shape = compute_batch_to_space_shape(input->data_layout(), input->tensor_shape(), block_shape_x, block_shape_y, crop_info);
- const TensorInfo expected_output = output->clone()->set_tensor_shape(expected_output_shape);
+ const TensorShape expected_output_shape = compute_batch_to_space_shape(
+ input->data_layout(), input->tensor_shape(), block_shape_x, block_shape_y, crop_info);
+ const TensorInfo expected_output = output->clone()->set_tensor_shape(expected_output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &expected_output);
ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -79,8 +85,7 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
}
} // namespace
-CLBatchToSpaceLayerKernel::CLBatchToSpaceLayerKernel()
- : _input(nullptr), _block_shape(nullptr), _output(nullptr)
+CLBatchToSpaceLayerKernel::CLBatchToSpaceLayerKernel() : _input(nullptr), _block_shape(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -90,11 +95,14 @@ void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTenso
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output);
}
-void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
+void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *block_shape,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- auto padding_info = get_padding_info({ input, block_shape, output });
+ auto padding_info = get_padding_info({input, block_shape, output});
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info()));
@@ -106,8 +114,9 @@ void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_contex
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(3)));
- _kernel = create_kernel(compile_context, "batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
-
+ _kernel = create_kernel(compile_context,
+ "batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
@@ -116,47 +125,65 @@ void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_contex
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, const CropInfo &crop_info)
+void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input,
+ const int32_t block_shape_x,
+ const int32_t block_shape_y,
+ ICLTensor *output,
+ const CropInfo &crop_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, output, crop_info);
}
-void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output,
- const CropInfo &crop_info)
+void CLBatchToSpaceLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const int32_t block_shape_x,
+ const int32_t block_shape_y,
+ ICLTensor *output,
+ const CropInfo &crop_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- const TensorShape output_shape = compute_batch_to_space_shape(input->info()->data_layout(), input->info()->tensor_shape(), block_shape_x, block_shape_y);
+ const TensorShape output_shape = compute_batch_to_space_shape(
+ input->info()->data_layout(), input->info()->tensor_shape(), block_shape_x, block_shape_y);
auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info(), crop_info));
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info(), crop_info));
_input = input;
_output = output;
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
+ build_opts.add_option("-DDATA_TYPE=" +
+ get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(3)));
build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x));
build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y));
build_opts.add_option("-DCROP_LEFT=" + support::cpp11::to_string(crop_info.left));
build_opts.add_option("-DCROP_TOP=" + support::cpp11::to_string(crop_info.top));
- _kernel = create_kernel(compile_context, "batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
+ _kernel = create_kernel(
+ compile_context, "batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
ICLKernel::configure_internal(win);
}
-Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
+Status
+CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_shape, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, output));
return Status{};
}
-Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output, const CropInfo &crop_info)
+Status CLBatchToSpaceLayerKernel::validate(const ITensorInfo *input,
+ const int32_t block_shape_x,
+ const int32_t block_shape_y,
+ const ITensorInfo *output,
+ const CropInfo &crop_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, output, crop_info));
@@ -185,7 +212,7 @@ void CLBatchToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu
unsigned int idx = 0;
add_4D_tensor_argument(idx, _input, slice_in);
add_argument(idx, batch_id);
- if(_block_shape != nullptr)
+ if (_block_shape != nullptr)
{
add_1D_tensor_argument(idx, _block_shape, vector_slice);
}
@@ -193,7 +220,6 @@ void CLBatchToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu
enqueue(queue, *this, slice_out, lws_hint());
++batch_id;
- }
- while(window.slide_window_slice_3D(slice_out));
+ } while (window.slide_window_slice_3D(slice_out));
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h
index a05184cd5b..b9d3e66fe2 100644
--- a/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h
+++ b/src/core/CL/kernels/CLBatchToSpaceLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -65,7 +66,10 @@ public:
*
* @deprecated This method for dynamic block shape is not fully mature and will be removed in 23.08 release
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *block_shape,
+ ICLTensor *output);
/** Initialise the kernel's inputs and output (Static block shape).
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -74,7 +78,11 @@ public:
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*/
- void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, const CropInfo &crop_info);
+ void configure(const ICLTensor *input,
+ const int32_t block_shape_x,
+ const int32_t block_shape_y,
+ ICLTensor *output,
+ const CropInfo &crop_info);
/** Initialise the kernel's inputs and output (Static block shape).
*
* @param[in] compile_context The compile context to be used.
@@ -84,7 +92,12 @@ public:
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] crop_info Specifies how the output shape is cropped after batch to space is performed
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output, const CropInfo &crop_info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const int32_t block_shape_x,
+ const int32_t block_shape_y,
+ ICLTensor *output,
+ const CropInfo &crop_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -106,7 +119,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output, const CropInfo &crop_info);
+ static Status validate(const ITensorInfo *input,
+ const int32_t block_shape_x,
+ const int32_t block_shape_y,
+ const ITensorInfo *output,
+ const CropInfo &crop_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLBitwiseKernel.cpp b/src/core/CL/kernels/CLBitwiseKernel.cpp
index 11e6d021a5..de3fb43de8 100644
--- a/src/core/CL/kernels/CLBitwiseKernel.cpp
+++ b/src/core/CL/kernels/CLBitwiseKernel.cpp
@@ -28,25 +28,29 @@
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
namespace arm_compute
{
-CLBitwiseKernel::CLBitwiseKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
+CLBitwiseKernel::CLBitwiseKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLBitwiseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, BitwiseOperation op)
+void CLBitwiseKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ BitwiseOperation op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
- if(op != BitwiseOperation::NOT)
+ if (op != BitwiseOperation::NOT)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input2);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
@@ -56,7 +60,7 @@ void CLBitwiseKernel::configure(const CLCompileContext &compile_context, const I
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*(output->info()), *(input1->info()));
- auto padding_info = get_padding_info({ input1, input2, output });
+ auto padding_info = get_padding_info({input1, input2, output});
// Configure kernel window
const unsigned int vec_size_x = adjust_vec_size(16 / output->info()->element_size(), output->info()->dimension(0));
@@ -68,7 +72,7 @@ void CLBitwiseKernel::configure(const CLCompileContext &compile_context, const I
// Create kernel
std::string kernel_name = "";
- switch(op)
+ switch (op)
{
case BitwiseOperation::AND:
kernel_name = "bitwise_and";
@@ -107,13 +111,12 @@ void CLBitwiseKernel::run(const Window &window, cl::CommandQueue &queue)
{
unsigned int idx = 0;
add_2D_tensor_argument(idx, _input1, slice);
- if(_input2 != nullptr)
+ if (_input2 != nullptr)
{
add_2D_tensor_argument(idx, _input2, slice);
}
add_2D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
+ } while (window.slide_window_slice_2D(slice));
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLBitwiseKernel.h b/src/core/CL/kernels/CLBitwiseKernel.h
index c5a999643d..2c74955ae4 100644
--- a/src/core/CL/kernels/CLBitwiseKernel.h
+++ b/src/core/CL/kernels/CLBitwiseKernel.h
@@ -59,7 +59,11 @@ public:
* @param[out] output Destination tensor. Data types supported: U8.
* @param[in] op Bitwise operation to perform. Supported: AND, OR, NOT, XOR.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, BitwiseOperation op);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ BitwiseOperation op);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
index 72de854afb..f32c518e29 100644
--- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
+++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -40,7 +41,10 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info)
+Status validate_arguments(const ITensorInfo *boxes,
+ const ITensorInfo *pred_boxes,
+ const ITensorInfo *deltas,
+ const BoundingBoxTransformInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(boxes);
@@ -53,7 +57,7 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe
ARM_COMPUTE_RETURN_ERROR_ON(boxes->num_dimensions() > 2);
const bool is_qasymm16 = boxes->data_type() == DataType::QASYMM16;
- if(is_qasymm16)
+ if (is_qasymm16)
{
const UniformQuantizationInfo boxes_qinfo = boxes->quantization_info().uniform();
ARM_COMPUTE_RETURN_ERROR_ON(boxes_qinfo.scale != 0.125f);
@@ -65,12 +69,12 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(boxes, deltas);
}
- if(pred_boxes->total_size() > 0)
+ if (pred_boxes->total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(pred_boxes->tensor_shape(), deltas->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(pred_boxes, boxes);
ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes->num_dimensions() > 2);
- if(is_qasymm16)
+ if (is_qasymm16)
{
const UniformQuantizationInfo pred_boxes_qinfo = pred_boxes->quantization_info().uniform();
ARM_COMPUTE_RETURN_ERROR_ON(pred_boxes_qinfo.scale != 0.125f);
@@ -83,22 +87,31 @@ Status validate_arguments(const ITensorInfo *boxes, const ITensorInfo *pred_boxe
}
} // namespace
-CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel()
- : _boxes(nullptr), _pred_boxes(nullptr), _deltas(nullptr)
+CLBoundingBoxTransformKernel::CLBoundingBoxTransformKernel() : _boxes(nullptr), _pred_boxes(nullptr), _deltas(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
+void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes,
+ ICLTensor *pred_boxes,
+ const ICLTensor *deltas,
+ const BoundingBoxTransformInfo &info)
{
configure(CLKernelLibrary::get().get_compile_context(), boxes, pred_boxes, deltas, info);
}
-void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
+void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *boxes,
+ ICLTensor *pred_boxes,
+ const ICLTensor *deltas,
+ const BoundingBoxTransformInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas);
- auto padding_info = get_padding_info({ boxes, pred_boxes, deltas });
- auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info()));
+ auto padding_info = get_padding_info({boxes, pred_boxes, deltas});
+ auto_init_if_empty(*pred_boxes->info(), deltas->info()
+ ->clone()
+ ->set_data_type(boxes->info()->data_type())
+ .set_quantization_info(boxes->info()->quantization_info()));
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(boxes->info(), pred_boxes->info(), deltas->info(), info));
@@ -128,7 +141,7 @@ void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_con
build_opts.add_option_if(info.apply_scale(), "-DSCALE_AFTER=" + float_to_string_with_full_precision(info.scale()));
build_opts.add_option_if(info.correct_transform_coords(), "-DOFFSET=1");
- if(is_quantized)
+ if (is_quantized)
{
build_opts.add_option("-DDATA_TYPE_DELTAS=" + get_cl_type_from_data_type(deltas->info()->data_type()));
const UniformQuantizationInfo boxes_qinfo = boxes->info()->quantization_info().uniform();
@@ -148,12 +161,15 @@ void CLBoundingBoxTransformKernel::configure(const CLCompileContext &compile_con
// Since the number of columns is a multiple of 4 by definition, we don't need to pad the tensor
const unsigned int num_elems_processed_per_iteration = 4;
- Window win = calculate_max_window(*deltas->info(), Steps(num_elems_processed_per_iteration));
+ Window win = calculate_max_window(*deltas->info(), Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win);
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLBoundingBoxTransformKernel::validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info)
+Status CLBoundingBoxTransformKernel::validate(const ITensorInfo *boxes,
+ const ITensorInfo *pred_boxes,
+ const ITensorInfo *deltas,
+ const BoundingBoxTransformInfo &info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(boxes, pred_boxes, deltas, info));
return Status{};
diff --git a/src/core/CL/kernels/CLBoundingBoxTransformKernel.h b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h
index 08f350e86a..9a1bb49bb9 100644
--- a/src/core/CL/kernels/CLBoundingBoxTransformKernel.h
+++ b/src/core/CL/kernels/CLBoundingBoxTransformKernel.h
@@ -58,7 +58,10 @@ public:
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
*
*/
- void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+ void configure(const ICLTensor *boxes,
+ ICLTensor *pred_boxes,
+ const ICLTensor *deltas,
+ const BoundingBoxTransformInfo &info);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -71,7 +74,11 @@ public:
* @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
*
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *boxes,
+ ICLTensor *pred_boxes,
+ const ICLTensor *deltas,
+ const BoundingBoxTransformInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
*
@@ -85,7 +92,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
+ static Status validate(const ITensorInfo *boxes,
+ const ITensorInfo *pred_boxes,
+ const ITensorInfo *deltas,
+ const BoundingBoxTransformInfo &info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
index a2a0bc4fb4..ec58bf9e7a 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -46,15 +47,19 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups < 2, "Channel shuffling with less than 2 groups would be inefficient");
- const unsigned int channels = input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL));
+ const unsigned int channels =
+ input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL));
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups == channels, "Channel shuffling with same number of groups as number of channels would be inefficient");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ num_groups == channels,
+ "Channel shuffling with same number of groups as number of channels would be inefficient");
// There cannot be more groups than channels
ARM_COMPUTE_RETURN_ERROR_ON(num_groups > channels);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((channels % num_groups) != 0, "The number of channels must be a multiple of the number of groups");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((channels % num_groups) != 0,
+ "The number of channels must be a multiple of the number of groups");
// Checks performed when output is configured
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -70,11 +75,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
auto_init_if_empty(*output, *input->clone());
const bool is_nhwc = input->data_layout() == DataLayout::NHWC;
- if(is_nhwc)
+ if (is_nhwc)
{
- unsigned int num_elems_processed_per_iteration_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x));
- Window win_collapsed = win.collapse(win, Window::DimZ);
+ unsigned int num_elems_processed_per_iteration_x =
+ adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x));
+ Window win_collapsed = win.collapse(win, Window::DimZ);
return std::make_pair(Status{}, win_collapsed);
}
else
@@ -83,22 +89,25 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
constexpr unsigned int num_elems_processed_per_iteration_y = 2;
// Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
- AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
- AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
+ Window win = calculate_max_window(
+ *input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+ AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x,
+ num_elems_processed_per_iteration_y);
+ AccessWindowRectangle output_access(output, 0, 0, num_elems_processed_per_iteration_x,
+ num_elems_processed_per_iteration_y);
const bool window_changed = update_window_and_padding(win, input_access, output_access);
Window win_collapsed = win.collapse(win, Window::DimZ);
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win_collapsed);
}
}
} // namespace
-CLChannelShuffleLayerKernel::CLChannelShuffleLayerKernel()
- : _input(nullptr), _output(nullptr)
+CLChannelShuffleLayerKernel::CLChannelShuffleLayerKernel() : _input(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -108,23 +117,27 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o
configure(CLKernelLibrary::get().get_compile_context(), input, output, num_groups);
}
-void CLChannelShuffleLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
+void CLChannelShuffleLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), num_groups));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
- const DataLayout data_layout = input->info()->data_layout();
- const bool is_nhwc = data_layout == DataLayout::NHWC;
- const unsigned int channels = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL));
- unsigned int vec_size_x = 0;
- unsigned int vec_size_x_leftovers = 0;
- if(is_nhwc)
+ const DataLayout data_layout = input->info()->data_layout();
+ const bool is_nhwc = data_layout == DataLayout::NHWC;
+ const unsigned int channels =
+ input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL));
+ unsigned int vec_size_x = 0;
+ unsigned int vec_size_x_leftovers = 0;
+ if (is_nhwc)
{
- vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
+ vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x;
}
else
@@ -170,13 +183,14 @@ void CLChannelShuffleLayerKernel::configure(const CLCompileContext &compile_cont
_config_id += support::cpp11::to_string(output->info()->dimension(1));
_config_id += "_";
_config_id += support::cpp11::to_string(output->info()->dimension(2));
- if(data_layout == DataLayout::NHWC)
+ if (data_layout == DataLayout::NHWC)
{
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
}
-Status CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
+Status
+CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, num_groups));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.h b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h
index 31c007f17e..43c939ebd8 100644
--- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.h
+++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.h
@@ -60,7 +60,10 @@ public:
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int num_groups);
/** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
*
* @param[in] input Input tensor info. Data types supported: All.
diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp
index f4d6316517..f27270733e 100644
--- a/src/core/CL/kernels/CLComparisonKernel.cpp
+++ b/src/core/CL/kernels/CLComparisonKernel.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -38,14 +39,10 @@ namespace arm_compute
namespace
{
// Create supported comparisons map
-const std::map<ComparisonOperation, std::string> supported_comparison_ops =
-{
- { ComparisonOperation::Equal, "EQUAL" },
- { ComparisonOperation::NotEqual, "NOTEQUAL" },
- { ComparisonOperation::Greater, "GREATER" },
- { ComparisonOperation::GreaterEqual, "GREATEREQUAL" },
- { ComparisonOperation::Less, "LESS" },
- { ComparisonOperation::LessEqual, "LESSEQUAL" },
+const std::map<ComparisonOperation, std::string> supported_comparison_ops = {
+ {ComparisonOperation::Equal, "EQUAL"}, {ComparisonOperation::NotEqual, "NOTEQUAL"},
+ {ComparisonOperation::Greater, "GREATER"}, {ComparisonOperation::GreaterEqual, "GREATEREQUAL"},
+ {ComparisonOperation::Less, "LESS"}, {ComparisonOperation::LessEqual, "LESSEQUAL"},
};
int calculate_num_elems_processed_per_iteration(const ITensorInfo &input)
@@ -53,7 +50,10 @@ int calculate_num_elems_processed_per_iteration(const ITensorInfo &input)
return 16 / input.element_size();
}
-Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output, ComparisonOperation operation)
+Status validate_arguments(const ITensorInfo &input1,
+ const ITensorInfo &input2,
+ const ITensorInfo &output,
+ ComparisonOperation operation)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1);
ARM_COMPUTE_RETURN_ERROR_ON(input1.data_type() == DataType::UNKNOWN);
@@ -64,7 +64,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
// Validate in case of configured output
- if(output.total_size() > 0)
+ if (output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0),
@@ -76,7 +76,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2,
std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output)
{
- const TensorShape &out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
+ const TensorShape &out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
const unsigned int num_elems_processed_per_iteration = calculate_num_elems_processed_per_iteration(input1);
// Auto initialize output if not initialized
@@ -90,27 +90,34 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITe
AccessWindowHorizontal input2_access(&input2, 0, num_elems_processed_per_iteration);
AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration);
- bool window_changed = update_window_and_padding(win_input1, input1_access)
- || update_window_and_padding(win_input2, input2_access)
- || update_window_and_padding(win, output_access);
+ bool window_changed = update_window_and_padding(win_input1, input1_access) ||
+ update_window_and_padding(win_input2, input2_access) ||
+ update_window_and_padding(win, output_access);
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
-CLComparisonKernel::CLComparisonKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
+CLComparisonKernel::CLComparisonKernel() : _input1(nullptr), _input2(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
+void CLComparisonKernel::configure(const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ ComparisonOperation operation)
{
configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, operation);
}
-void CLComparisonKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
+void CLComparisonKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ ComparisonOperation operation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), operation));
@@ -129,10 +136,11 @@ void CLComparisonKernel::configure(const CLCompileContext &compile_context, cons
// Set kernel build options
std::set<std::string> build_opts;
build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type()));
- build_opts.emplace("-DVEC_SIZE=" + support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info())));
+ build_opts.emplace("-DVEC_SIZE=" +
+ support::cpp11::to_string(calculate_num_elems_processed_per_iteration(*input1->info())));
build_opts.emplace("-DOP=" + operation_name);
build_opts.emplace("-DOP_NAME=" + lower_string(operation_name));
- if(is_data_type_quantized(input1->info()->data_type()))
+ if (is_data_type_quantized(input1->info()->data_type()))
{
const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
@@ -160,12 +168,16 @@ void CLComparisonKernel::configure(const CLCompileContext &compile_context, cons
_config_id += lower_string(string_from_data_layout(input1->info()->data_layout()));
}
-Status CLComparisonKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation)
+Status CLComparisonKernel::validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ComparisonOperation operation)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output, operation));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(*input1->clone(), *input2->clone(), *output->clone()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(*input1->clone(), *input2->clone(), *output->clone()).first);
return Status{};
}
@@ -181,17 +193,18 @@ void CLComparisonKernel::run(const Window &window, cl::CommandQueue &queue)
bool can_collapse = true;
const bool is_vector = in_shape1.num_dimensions() == 1 || in_shape2.num_dimensions() == 1;
- if(std::min(in_shape1.total_size(), in_shape2.total_size()) > 1 && !is_vector)
+ if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1 && !is_vector)
{
can_collapse = (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for(size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
+ for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); d++)
{
can_collapse = (in_shape1[d] == in_shape2[d]);
}
}
bool has_collapsed = false;
- Window collapsed = can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) : window;
+ Window collapsed =
+ can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) : window;
const TensorShape &in_shape1_collapsed = has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
const TensorShape &in_shape2_collapsed = has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
@@ -212,16 +225,16 @@ void CLComparisonKernel::run(const Window &window, cl::CommandQueue &queue)
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input1));
ARM_COMPUTE_UNUSED(collapsed.slide_window_slice_3D(slice_input2));
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
BorderSize CLComparisonKernel::border_size() const
{
const int num_elems_processed_per_iteration = calculate_num_elems_processed_per_iteration(*_input1->info());
- const unsigned int replicateSize = _output->info()->dimension(0) - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize{ 0, border, 0, 0 };
+ const unsigned int replicateSize =
+ _output->info()->dimension(0) - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
+ const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
+ return BorderSize{0, border, 0, 0};
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLComparisonKernel.h b/src/core/CL/kernels/CLComparisonKernel.h
index 0b94190183..174a6c9bf9 100644
--- a/src/core/CL/kernels/CLComparisonKernel.h
+++ b/src/core/CL/kernels/CLComparisonKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLCOMPARISONKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -64,7 +65,11 @@ public:
* @param[out] output Destination tensor. Data types supported: U8.
* @param[in] operation Comparison operation to use.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ ComparisonOperation operation);
/** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
*
* @param[in] input1 Source tensor. Data types supported: All.
@@ -74,10 +79,13 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation);
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ ComparisonOperation operation);
// Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
+ void run(const Window &window, cl::CommandQueue &queue) override;
BorderSize border_size() const override;
private:
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index 76af5d564a..f8ecc4c098 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -29,6 +29,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -40,7 +41,8 @@ CLDeconvolutionLayerUpsampleKernel::CLDeconvolutionLayerUpsampleKernel()
_type = CLKernelType::ELEMENTWISE;
}
-Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
+Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
const PadStrideInfo &info)
{
ARM_COMPUTE_UNUSED(info);
@@ -60,7 +62,7 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c));
- for(size_t i = 3; i < Coordinates::num_max_dimensions; ++i)
+ for (size_t i = 3; i < Coordinates::num_max_dimensions; ++i)
{
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
}
@@ -68,20 +70,21 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co
return Status{};
}
-void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output,
- const PadStrideInfo &info)
+void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
}
-void CLDeconvolutionLayerUpsampleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
- const PadStrideInfo &info)
+void CLDeconvolutionLayerUpsampleKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const PadStrideInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayerUpsampleKernel::validate(input->info(), output->info(), info));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
@@ -119,7 +122,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu
const int out_end_y = _output->info()->dimension(idx_h) - _info.pad_bottom() + _info.stride().second - 1;
const int out_step_y = _info.stride().second;
- switch(_data_layout)
+ switch (_data_layout)
{
case DataLayout::NCHW:
{
@@ -137,8 +140,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
enqueue(queue, *this, slice_out, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out));
+ } while (collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out));
break;
}
case DataLayout::NHWC:
@@ -156,8 +158,7 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu
add_3D_tensor_argument(idx, _input, slice_in);
add_3D_tensor_argument(idx, _output, slice_out);
enqueue(queue, *this, slice_out, lws_hint());
- }
- while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
+ } while (window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
break;
}
default:
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
index e0d1322341..762989a836 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h
@@ -62,7 +62,10 @@ public:
* @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
*
* @param[in] input Source tensor info. Data types supported: All.
diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
index 0fc0ff8168..b33e0a8b6f 100644
--- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp
@@ -27,9 +27,10 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -38,7 +39,11 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, input_info, weights_info);
@@ -53,19 +58,21 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
ARM_COMPUTE_RETURN_ERROR_ON(weights_info->dimension(idx_w) != deconv_info.stride().first);
ARM_COMPUTE_RETURN_ERROR_ON(weights_info->dimension(idx_h) != deconv_info.stride().second);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32);
- if(!is_qasymm)
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8,
+ DataType::QASYMM8_SIGNED, DataType::S32);
+ if (!is_qasymm)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, input_info, weights_info);
}
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_info->dimension(idx_w) * weights_info->dimension(idx_h) * weights_info->dimension(idx_b));
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_info->dimension(idx_w) * weights_info->dimension(idx_h) *
+ weights_info->dimension(idx_b));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != input_info->dimension(idx_w));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != input_info->dimension(idx_h));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) != input_info->dimension(idx_b));
- if(bias != nullptr)
+ if (bias != nullptr)
{
- if(is_qasymm)
+ if (is_qasymm)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
}
@@ -76,19 +83,26 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights_info->dimension(idx_b));
}
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second);
- auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info);
+ auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h),
+ weights_info->dimension(idx_w), weights_info->dimension(idx_h),
+ stride_info);
- const TensorShape output_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info);
+ const TensorShape output_shape =
+ misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
}
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info)
+std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input,
+ ITensorInfo *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -97,11 +111,17 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
const PadStrideInfo stride_info(deconv_info.stride().first, deconv_info.stride().second);
- auto out_dims = deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h), weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info);
+ auto out_dims =
+ deconvolution_output_dimensions(input_info->dimension(idx_w), input_info->dimension(idx_h),
+ weights_info->dimension(idx_w), weights_info->dimension(idx_h), stride_info);
- const TensorShape output_shape = misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info);
+ const TensorShape output_shape =
+ misc::shape_calculator::compute_deconvolution_output_shape(out_dims, *input_info, *weights_info);
- auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout).set_quantization_info(input->quantization_info()));
+ auto_init_if_empty(*output, input->clone()
+ ->set_tensor_shape(output_shape)
+ .set_data_layout(data_layout)
+ .set_quantization_info(input->quantization_info()));
Window win = calculate_max_window(*input);
@@ -109,29 +129,37 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
}
} // namespace
-CLDeconvolutionReshapeOutputKernel::CLDeconvolutionReshapeOutputKernel()
- : _add_bias(false),
- _bias(nullptr)
+CLDeconvolutionReshapeOutputKernel::CLDeconvolutionReshapeOutputKernel() : _add_bias(false), _bias(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
+void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input,
+ const ICLTensor *bias,
+ ICLTensor *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
const PadStrideInfo &deconv_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, input_info, weights_info, deconv_info);
}
-void CLDeconvolutionReshapeOutputKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info,
- const ITensorInfo *weights_info,
- const PadStrideInfo &deconv_info)
+void CLDeconvolutionReshapeOutputKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *bias,
+ ICLTensor *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, input_info, weights_info);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), input_info, weights_info, deconv_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr),
+ output->info(), input_info, weights_info, deconv_info));
- auto padding_info = get_padding_info({ input, bias, output });
+ auto padding_info = get_padding_info({input, bias, output});
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), input_info, weights_info, deconv_info);
+ auto win_config =
+ validate_and_configure_window(input->info(), output->info(), input_info, weights_info, deconv_info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
const DataLayout data_layout = input_info->data_layout();
@@ -178,7 +206,11 @@ void CLDeconvolutionReshapeOutputKernel::configure(const CLCompileContext &compi
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLDeconvolutionReshapeOutputKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
+Status CLDeconvolutionReshapeOutputKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, input_info, weights_info, deconv_info));
@@ -194,7 +226,7 @@ void CLDeconvolutionReshapeOutputKernel::run(const Window &window, cl::CommandQu
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, collapsed);
add_3D_tensor_argument(idx, _output, collapsed);
- if(_add_bias)
+ if (_add_bias)
{
add_1D_tensor_argument(idx, _bias, collapsed);
}
diff --git a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
index ce354fa86f..8f436b07e3 100644
--- a/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
+++ b/src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h
@@ -67,7 +67,12 @@ public:
* @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
* @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
*/
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
+ void configure(const ICLTensor *input,
+ const ICLTensor *bias,
+ ICLTensor *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info);
/** Initialise the kernel's source and destination.
*
* @param[in] compile_context The compile context to be used.
@@ -79,8 +84,13 @@ public:
* @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
* @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
- const PadStrideInfo &deconv_info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *bias,
+ ICLTensor *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel.
*
@@ -93,7 +103,12 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
index 5c1dc4fbf6..cdf19ab2e1 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -49,12 +50,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_channel] % (block_shape * block_shape) != 0);
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] != (block_shape * input->tensor_shape()[idx_width]));
- ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] != (block_shape * input->tensor_shape()[idx_height]));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] !=
+ (block_shape * input->tensor_shape()[idx_width]));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] !=
+ (block_shape * input->tensor_shape()[idx_height]));
ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -63,8 +66,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
}
} // namespace
-CLDepthToSpaceLayerKernel::CLDepthToSpaceLayerKernel()
- : _input(nullptr), _output(nullptr), _block_shape()
+CLDepthToSpaceLayerKernel::CLDepthToSpaceLayerKernel() : _input(nullptr), _output(nullptr), _block_shape()
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -74,14 +76,18 @@ void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *out
configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
}
-void CLDepthToSpaceLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
+void CLDepthToSpaceLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ int32_t block_shape)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- TensorShape output_shape = compute_depth_to_space_shape(input->info()->tensor_shape(), input->info()->data_layout(), block_shape);
+ TensorShape output_shape =
+ compute_depth_to_space_shape(input->info()->tensor_shape(), input->info()->data_layout(), block_shape);
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_shape));
@@ -98,7 +104,9 @@ void CLDepthToSpaceLayerKernel::configure(const CLCompileContext &compile_contex
build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel)));
build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
- _kernel = create_kernel(compile_context, "depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
+ _kernel = create_kernel(compile_context,
+ "depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
@@ -137,7 +145,6 @@ void CLDepthToSpaceLayerKernel::run(const Window &window, cl::CommandQueue &queu
enqueue(queue, *this, slice_in, lws_hint());
++batch_id;
- }
- while(window.slide_window_slice_3D(slice_in));
+ } while (window.slide_window_slice_3D(slice_in));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h
index 1f7f77b569..cef70c4dda 100644
--- a/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h
+++ b/src/core/CL/kernels/CLDepthToSpaceLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -61,7 +62,8 @@ public:
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ void
+ configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index e34b6929e7..b95abe795f 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -23,16 +23,17 @@
*/
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLUtils.h"
#include "src/core/CL/CLValidate.h"
#include "src/core/CL/ICLKernel.h"
@@ -45,12 +46,18 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info,
- const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ITensorInfo *output_multipliers,
+ const ITensorInfo *output_shifts)
{
ARM_COMPUTE_UNUSED(dwc_info);
bool in_place = false;
- if(output == nullptr || output == input)
+ if (output == nullptr || output == input)
{
in_place = true;
output = input;
@@ -58,11 +65,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first > 1 && dwc_info.m0 != 1);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation.x() > 1 && dwc_info.m0 != 1);
ARM_COMPUTE_RETURN_ERROR_ON((dwc_info.export_input_to_cl_image == true));
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((dwc_info.export_weights_to_cl_image == true) && (export_to_cl_image(weights) == false), "Weights cannot be exported to cl_image!");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((dwc_info.export_weights_to_cl_image == true) &&
+ (export_to_cl_image(weights) == false),
+ "Weights cannot be exported to cl_image!");
ARM_COMPUTE_RETURN_ERROR_ON((dwc_info.export_weights_to_cl_image == true) && ((dwc_info.n0 % 4) != 0));
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first < 1);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().second < 1);
@@ -72,33 +82,40 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_c) != (input->dimension(idx_c) * conv_info.depth_multiplier));
// In place restrictions
- if(in_place)
+ if (in_place)
{
- const int weights_width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
- const int weights_height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape()[weights_width_idx] != 1U || weights->tensor_shape()[weights_height_idx] != 1U);
+ const int weights_width_idx =
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+ const int weights_height_idx =
+ get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape()[weights_width_idx] != 1U ||
+ weights->tensor_shape()[weights_height_idx] != 1U);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.depth_multiplier != 1U);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride() != std::make_pair(1U, 1U));
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation != Size2D(1U, 1U));
- ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.has_padding()); // Note that in princple padding can be supported with in_place but we choose not to support it
+ ARM_COMPUTE_RETURN_ERROR_ON(
+ conv_info.pad_stride_info
+ .has_padding()); // Note that in princple padding can be supported with in_place but we choose not to support it
}
- const ConvolutionInfo info{ conv_info.pad_stride_info, conv_info.depth_multiplier, ActivationLayerInfo(), conv_info.dilation };
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info);
+ const ConvolutionInfo info{conv_info.pad_stride_info, conv_info.depth_multiplier, ActivationLayerInfo(),
+ conv_info.dilation};
+ const TensorShape output_shape =
+ arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info);
- if(conv_info.depth_multiplier > 1 && dwc_info.n0 > 1)
+ if (conv_info.depth_multiplier > 1 && dwc_info.n0 > 1)
{
ARM_COMPUTE_RETURN_ERROR_ON((conv_info.depth_multiplier % dwc_info.n0) != 0);
}
const bool is_quantized = is_data_type_quantized(input->data_type());
- if(biases != nullptr)
+ if (biases != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != output_shape[idx_c]);
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
- if(is_quantized)
+ if (is_quantized)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
}
@@ -108,7 +125,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
}
}
- if(is_quantized)
+ if (is_quantized)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output_multipliers, output_shifts);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_multipliers, 1, DataType::S32);
@@ -116,7 +133,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_RETURN_ERROR_ON(output_multipliers->num_dimensions() > 1);
ARM_COMPUTE_RETURN_ERROR_ON(output_shifts->num_dimensions() > 1);
- if(is_data_type_quantized_per_channel(weights->data_type()))
+ if (is_data_type_quantized_per_channel(weights->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QSYMM8_PER_CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape[idx_c] != output_multipliers->dimension(0));
@@ -134,22 +151,24 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
}
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
- if(is_data_type_quantized(input->data_type()))
+ if (is_data_type_quantized(input->data_type()))
{
const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
const UniformQuantizationInfo wq_info = weights->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = (output->total_size() != 0) ? output->quantization_info().uniform() : iq_info;
+ const UniformQuantizationInfo oq_info =
+ (output->total_size() != 0) ? output->quantization_info().uniform() : iq_info;
float multiplier = iq_info.scale * wq_info.scale / oq_info.scale;
int output_multiplier = 0;
int output_shift = 0;
- ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift));
}
return Status{};
@@ -171,30 +190,48 @@ CLDepthwiseConvolutionLayerNativeKernel::CLDepthwiseConvolutionLayerNativeKernel
_type = CLKernelType::DEPTHWISE;
}
-void CLDepthwiseConvolutionLayerNativeKernel::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
- const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info,
- const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+void CLDepthwiseConvolutionLayerNativeKernel::configure(ICLTensor *input,
+ const ICLTensor *weights,
+ const ICLTensor *biases,
+ ICLTensor *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ICLTensor *output_multipliers,
+ const ICLTensor *output_shifts)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_info, conv_info, output_multipliers, output_shifts);
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_info, conv_info,
+ output_multipliers, output_shifts);
}
-void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
- const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info,
- const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ const ICLTensor *weights,
+ const ICLTensor *biases,
+ ICLTensor *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ICLTensor *output_multipliers,
+ const ICLTensor *output_shifts)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
- if(output == nullptr)
+ if (output == nullptr)
{
// In-place
output = input;
}
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
- dwc_info, conv_info, (output_multipliers != nullptr) ? output_multipliers->info() : nullptr, (output_shifts != nullptr) ? output_shifts->info() : nullptr));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(
+ input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), dwc_info,
+ conv_info, (output_multipliers != nullptr) ? output_multipliers->info() : nullptr,
+ (output_shifts != nullptr) ? output_shifts->info() : nullptr));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*(input->info()), *(weights->info()), conv_info);
- auto_init_if_empty(*(output->info()), input->info()->clone()->set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info()));
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(
+ *(input->info()), *(weights->info()), conv_info);
+ auto_init_if_empty(*(output->info()), input->info()
+ ->clone()
+ ->set_tensor_shape(output_shape)
+ .set_quantization_info(output->info()->quantization_info()));
_input = input;
_output = output;
@@ -214,12 +251,12 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
CLBuildOptions build_opts;
// Update the padding for the input/weights tensor if we can export to cl_image
- if(_export_input_to_cl_image)
+ if (_export_input_to_cl_image)
{
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(input->info());
}
- if(_export_weights_to_cl_image)
+ if (_export_weights_to_cl_image)
{
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weights->info());
}
@@ -229,9 +266,10 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
const auto act_function = conv_info.act_info.activation();
const auto dst_data_type = _output->info()->data_type();
- if((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
- && (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU || act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
- && (dst_data_type == DataType::F32 || dst_data_type == DataType::F16))
+ if ((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+ (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU ||
+ act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) &&
+ (dst_data_type == DataType::F32 || dst_data_type == DataType::F16))
{
// -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
// to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
@@ -268,23 +306,24 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
build_opts.add_option("-DM0_A=" + support::cpp11::to_string(_weights->info()->dimension(1) + m0 - 1));
- build_opts.add_option_if_else(conv_info.depth_multiplier > 1, "-DN0_A=1", "-DN0_A=" + support::cpp11::to_string(n0));
+ build_opts.add_option_if_else(conv_info.depth_multiplier > 1, "-DN0_A=1",
+ "-DN0_A=" + support::cpp11::to_string(n0));
build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_output->info()->dimension(0) % n0));
build_opts.add_option_if(_input->info()->num_dimensions() > 3, "-DBATCHED_EXECUTION");
// Force unroll with pragma when any of the following values exceed the maximum number of manual unroll
- set_unroll_with_pragma(build_opts, { static_cast<int>(_weights->info()->dimension(1) + m0 - 1),
- static_cast<int>(_weights->info()->dimension(1)),
- static_cast<int>(_weights->info()->dimension(2))
- });
+ set_unroll_with_pragma(build_opts, {static_cast<int>(_weights->info()->dimension(1) + m0 - 1),
+ static_cast<int>(_weights->info()->dimension(1)),
+ static_cast<int>(_weights->info()->dimension(2))});
- if(biases != nullptr)
+ if (biases != nullptr)
{
build_opts.add_option(std::string("-DHAS_BIAS"));
- build_opts.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->info()->data_type())));
+ build_opts.add_option(
+ std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->info()->data_type())));
}
- if(_is_quantized)
+ if (_is_quantized)
{
kernel_name = "dwc_native_quantized_nhwc";
const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform();
@@ -306,13 +345,17 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
build_opts.add_option("-DDST_OFFSET=" + support::cpp11::to_string(oqinfo.offset));
build_opts.add_option("-DZERO_VALUE=" + support::cpp11::to_string(zero_value_s32));
build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(DataType::S32));
- build_opts.add_option("-DDST_MULTIPLIERS_DATA_TYPE=" + get_cl_type_from_data_type(_output_multipliers->info()->data_type()));
- build_opts.add_option("-DDST_SHIFTS_DATA_TYPE=" + get_cl_type_from_data_type(_output_shifts->info()->data_type()));
- build_opts.add_option_if_else(weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL, "-DQUANTIZATION_TYPE=PER_CHANNEL", "-DQUANTIZATION_TYPE=PER_TENSOR");
+ build_opts.add_option("-DDST_MULTIPLIERS_DATA_TYPE=" +
+ get_cl_type_from_data_type(_output_multipliers->info()->data_type()));
+ build_opts.add_option("-DDST_SHIFTS_DATA_TYPE=" +
+ get_cl_type_from_data_type(_output_shifts->info()->data_type()));
+ build_opts.add_option_if_else(weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL,
+ "-DQUANTIZATION_TYPE=PER_CHANNEL", "-DQUANTIZATION_TYPE=PER_TENSOR");
// Note: We expect the input and output tensors to always adopt a per-tensor quantization approach
int a_val{};
int b_val{};
- std::tie(b_val, a_val) = get_quantized_activation_min_max(conv_info.act_info, input->info()->data_type(), oqinfo);
+ std::tie(b_val, a_val) =
+ get_quantized_activation_min_max(conv_info.act_info, input->info()->data_type(), oqinfo);
build_opts.add_option_if(conv_info.act_info.enabled(), "-DA_VAL=" + support::cpp11::to_string(a_val));
build_opts.add_option_if(conv_info.act_info.enabled(), "-DB_VAL=" + support::cpp11::to_string(b_val));
@@ -321,8 +364,10 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
{
kernel_name = "dwc_native_fp_nhwc";
build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.add_option_if(conv_info.act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(conv_info.act_info.a()));
- build_opts.add_option_if(conv_info.act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(conv_info.act_info.b()));
+ build_opts.add_option_if(conv_info.act_info.enabled(),
+ "-DA_VAL=" + float_to_string_with_full_precision(conv_info.act_info.a()));
+ build_opts.add_option_if(conv_info.act_info.enabled(),
+ "-DB_VAL=" + float_to_string_with_full_precision(conv_info.act_info.b()));
}
Window win = calculate_max_window(*(output->info()), Steps(n0, m0));
@@ -350,10 +395,17 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
_config_id += string_from_data_type(input->info()->data_type());
}
-Status CLDepthwiseConvolutionLayerNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const DWCComputeKernelInfo &dwc_info, const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts)
+Status CLDepthwiseConvolutionLayerNativeKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ITensorInfo *output_multipliers,
+ const ITensorInfo *output_shifts)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, dwc_info, conv_info, output_multipliers, output_shifts));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments(input, weights, biases, output, dwc_info, conv_info, output_multipliers, output_shifts));
return Status{};
}
@@ -370,47 +422,52 @@ void CLDepthwiseConvolutionLayerNativeKernel::run(const Window &window, cl::Comm
cl::Image2D input_cl_image;
cl::Image2D weights_cl_image;
- if(_export_input_to_cl_image || _export_weights_to_cl_image)
+ if (_export_input_to_cl_image || _export_weights_to_cl_image)
{
// Export cl_buffer to cl_image
- if(_export_input_to_cl_image)
+ if (_export_input_to_cl_image)
{
- const size_t image_w = _input->info()->dimension(0) / 4;
- const size_t image_h = _input->info()->dimension(1) * _input->info()->dimension(2) * _input->info()->dimension(3);
+ const size_t image_w = _input->info()->dimension(0) / 4;
+ const size_t image_h =
+ _input->info()->dimension(1) * _input->info()->dimension(2) * _input->info()->dimension(3);
const TensorShape shape2d(image_w, image_h);
const size_t image_row_pitch = _input->info()->strides_in_bytes()[1];
- input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _input->cl_buffer(), shape2d, _input->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+ input_cl_image =
+ create_image2d_from_buffer(CLKernelLibrary::get().context(), _input->cl_buffer(), shape2d,
+ _input->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
- if(_export_weights_to_cl_image)
+ if (_export_weights_to_cl_image)
{
- const size_t image_w = _weights->info()->dimension(0) / 4;
- const size_t image_h = _weights->info()->dimension(1) * _weights->info()->dimension(2) * _weights->info()->dimension(3);
+ const size_t image_w = _weights->info()->dimension(0) / 4;
+ const size_t image_h =
+ _weights->info()->dimension(1) * _weights->info()->dimension(2) * _weights->info()->dimension(3);
const TensorShape shape2d(image_w, image_h);
const size_t image_row_pitch = _weights->info()->strides_in_bytes()[1];
- weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), _weights->cl_buffer(), shape2d, _weights->info()->data_type(), image_row_pitch,
- CLImage2DType::ReadOnly);
+ weights_cl_image =
+ create_image2d_from_buffer(CLKernelLibrary::get().context(), _weights->cl_buffer(), shape2d,
+ _weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
}
}
unsigned int idx = 0;
- if(_export_input_to_cl_image)
+ if (_export_input_to_cl_image)
{
_kernel.setArg(idx++, input_cl_image);
}
add_4d_tensor_nhwc_argument(idx, _input);
add_4d_tensor_nhwc_argument(idx, _output);
- if(_export_weights_to_cl_image)
+ if (_export_weights_to_cl_image)
{
_kernel.setArg(idx++, weights_cl_image);
}
add_4d_tensor_nhwc_argument(idx, _weights);
- if(_is_quantized)
+ if (_is_quantized)
{
add_1D_tensor_argument(idx, _output_multipliers, slice);
add_1D_tensor_argument(idx, _output_shifts, slice);
}
- if(_biases != nullptr)
+ if (_biases != nullptr)
{
add_1D_tensor_argument(idx, _biases, slice);
}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
index 8eee7b2500..d34a662966 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h
@@ -24,11 +24,11 @@
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#include "src/core/CL/ICLKernel.h"
-
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/function_info/ConvolutionInfo.h"
+#include "src/core/CL/ICLKernel.h"
+
namespace arm_compute
{
class ICLTensor;
@@ -74,15 +74,28 @@ public:
* * no padding
* * no change of data layout after configure
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info,
- const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ const ICLTensor *weights,
+ const ICLTensor *biases,
+ ICLTensor *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ICLTensor *output_multipliers = nullptr,
+ const ICLTensor *output_shifts = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
*
* Similar to @ref CLDepthwiseConvolutionLayerNativeKernel::configure()
*/
- void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCComputeKernelInfo &dwc_info,
- const ConvolutionInfo &conv_info, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ void configure(ICLTensor *input,
+ const ICLTensor *weights,
+ const ICLTensor *biases,
+ ICLTensor *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ICLTensor *output_multipliers = nullptr,
+ const ICLTensor *output_shifts = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
*
@@ -90,23 +103,29 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCComputeKernelInfo &dwc_info,
- const ConvolutionInfo &conv_info, const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const DWCComputeKernelInfo &dwc_info,
+ const ConvolutionInfo &conv_info,
+ const ITensorInfo *output_multipliers = nullptr,
+ const ITensorInfo *output_shifts = nullptr);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
private:
- const ICLTensor *_input {};
+ const ICLTensor *_input{};
const ICLTensor *_weights{};
const ICLTensor *_biases{};
ICLTensor *_output{};
- unsigned int _depth_multiplier{ 0 };
+ unsigned int _depth_multiplier{0};
const ICLTensor *_output_multipliers{};
const ICLTensor *_output_shifts{};
- bool _export_input_to_cl_image{ false };
- bool _export_weights_to_cl_image{ true };
- bool _is_quantized{ false };
+ bool _export_input_to_cl_image{false};
+ bool _export_weights_to_cl_image{true};
+ bool _is_quantized{false};
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
index 9b514ed705..3d8f875ef7 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -37,17 +38,20 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *idx,
+ const FFTDigitReverseKernelInfo &config)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_channels() != 1 && input->num_channels() != 2);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(idx, 1, DataType::U32);
- ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({0, 1}).count(config.axis) == 0);
ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] != idx->tensor_shape().x());
// Checks performed when output is configured
- if((output != nullptr) && (output->total_size() != 0))
+ if ((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 2);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
@@ -57,7 +61,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input,
+ ITensorInfo *output,
+ ITensorInfo *idx,
+ const FFTDigitReverseKernelInfo &config)
{
ARM_COMPUTE_UNUSED(idx, config);
@@ -69,21 +76,27 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
} // namespace
-CLFFTDigitReverseKernel::CLFFTDigitReverseKernel()
- : _input(nullptr), _output(nullptr), _idx(nullptr)
+CLFFTDigitReverseKernel::CLFFTDigitReverseKernel() : _input(nullptr), _output(nullptr), _idx(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLFFTDigitReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config)
+void CLFFTDigitReverseKernel::configure(const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *idx,
+ const FFTDigitReverseKernelInfo &config)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, idx, config);
}
-void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config)
+void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *idx,
+ const FFTDigitReverseKernelInfo &config)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, idx);
- auto padding_info = get_padding_info({ input, output, idx });
+ auto padding_info = get_padding_info({input, output, idx});
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), idx->info(), config));
_input = input;
@@ -114,10 +127,14 @@ void CLFFTDigitReverseKernel::configure(const CLCompileContext &compile_context,
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLFFTDigitReverseKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config)
+Status CLFFTDigitReverseKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *idx,
+ const FFTDigitReverseKernelInfo &config)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, idx, config));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), idx->clone().get(), config).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), output->clone().get(), idx->clone().get(), config).first);
return Status{};
}
@@ -137,7 +154,6 @@ void CLFFTDigitReverseKernel::run(const Window &window, cl::CommandQueue &queue)
add_3D_tensor_argument(idx, _output, slice);
add_1D_tensor_argument(idx, _idx, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFFTDigitReverseKernel.h b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
index e5583a4c22..fdd1bcc3d3 100644
--- a/src/core/CL/kernels/CLFFTDigitReverseKernel.h
+++ b/src/core/CL/kernels/CLFFTDigitReverseKernel.h
@@ -24,10 +24,10 @@
#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
-#include "src/core/CL/ICLKernel.h"
-
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+
namespace arm_compute
{
// Forward declarations
@@ -56,7 +56,8 @@ public:
* @param[in] idx Digit reverse index tensor. Data type supported: U32
* @param[in] config Kernel configuration.
*/
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
+ void
+ configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -65,7 +66,11 @@ public:
* @param[in] idx Digit reverse index tensor. Data type supported: U32
* @param[in] config Kernel configuration.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *idx,
+ const FFTDigitReverseKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
*
* @param[in] input Source tensor info. Data types supported: F16/F32.
@@ -75,7 +80,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *idx,
+ const FFTDigitReverseKernelInfo &config);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
index 95f4b640bd..3729e6b77d 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.cpp
@@ -29,6 +29,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -46,11 +47,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(CLFFTRadixStageKernel::supported_radix().count(config.radix) == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({ 0, 1 }).count(config.axis) == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(std::set<unsigned int>({0, 1}).count(config.axis) == 0);
ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[config.axis] % config.radix);
// Checks performed when output is configured
- if((output != nullptr) && (output->total_size() != 0))
+ if ((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -59,9 +60,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const FFTRadixStageKernelInfo &config)
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const FFTRadixStageKernelInfo &config)
{
- if(output != nullptr)
+ if (output != nullptr)
{
auto_init_if_empty(*output, *input);
}
@@ -76,8 +78,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
}
} // namespace
-CLFFTRadixStageKernel::CLFFTRadixStageKernel()
- : _input(nullptr), _output(nullptr), _run_in_place(false)
+CLFFTRadixStageKernel::CLFFTRadixStageKernel() : _input(nullptr), _output(nullptr), _run_in_place(false)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -87,11 +88,15 @@ void CLFFTRadixStageKernel::configure(ICLTensor *input, ICLTensor *output, const
configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
}
-void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config)
+void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ const FFTRadixStageKernelInfo &config)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, config));
- auto padding_info = get_padding_info({ input, output });
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, config));
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
@@ -110,11 +115,12 @@ void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, I
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set static arguments if not the first stage
- if(!config.is_first_stage)
+ if (!config.is_first_stage)
{
const unsigned int Ni = config.Nx * config.radix;
const float exp_const = (-2.0 * M_PI) / static_cast<float>(Ni);
- unsigned int idx = (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters
+ unsigned int idx =
+ (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters
_kernel.setArg<cl_uint>(idx++, config.Nx);
_kernel.setArg<cl_uint>(idx++, Ni);
_kernel.setArg<cl_float>(idx, exp_const);
@@ -136,21 +142,22 @@ void CLFFTRadixStageKernel::configure(const CLCompileContext &compile_context, I
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLFFTRadixStageKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config)
+Status CLFFTRadixStageKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const FFTRadixStageKernelInfo &config)
{
const bool run_in_place = (output == nullptr) || (output == input);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, config));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
- (run_in_place) ? nullptr : output->clone().get(),
- config)
- .first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), (run_in_place) ? nullptr : output->clone().get(), config)
+ .first);
return Status{};
}
std::set<unsigned int> CLFFTRadixStageKernel::supported_radix()
{
- return std::set<unsigned int> { 2, 3, 4, 5, 7, 8 };
+ return std::set<unsigned int>{2, 3, 4, 5, 7, 8};
}
void CLFFTRadixStageKernel::run(const Window &window, cl::CommandQueue &queue)
@@ -165,12 +172,11 @@ void CLFFTRadixStageKernel::run(const Window &window, cl::CommandQueue &queue)
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
- if(!_run_in_place)
+ if (!_run_in_place)
{
add_3D_tensor_argument(idx, _output, slice);
}
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFFTRadixStageKernel.h b/src/core/CL/kernels/CLFFTRadixStageKernel.h
index 9bb310db83..de80bfced3 100644
--- a/src/core/CL/kernels/CLFFTRadixStageKernel.h
+++ b/src/core/CL/kernels/CLFFTRadixStageKernel.h
@@ -24,10 +24,10 @@
#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
-#include "src/core/CL/ICLKernel.h"
-
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+
#include <set>
namespace arm_compute
@@ -69,7 +69,10 @@ public:
* @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
* @param[in] config FFT descriptor metadata.
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ const FFTRadixStageKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
*
* @param[in] input Source tensor info. Data types supported: F16/F32.
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.cpp b/src/core/CL/kernels/CLFFTScaleKernel.cpp
index 8a714d71bf..be6e16b074 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.cpp
+++ b/src/core/CL/kernels/CLFFTScaleKernel.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -43,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
// Checks performed when output is configured
- if((output != nullptr) && (output->total_size() != 0))
+ if ((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON(output->num_channels() != 1 && output->num_channels() != 2);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
@@ -54,8 +55,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
}
} // namespace
-CLFFTScaleKernel::CLFFTScaleKernel()
- : _input(nullptr), _output(nullptr), _run_in_place(false)
+CLFFTScaleKernel::CLFFTScaleKernel() : _input(nullptr), _output(nullptr), _run_in_place(false)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -65,11 +65,14 @@ void CLFFTScaleKernel::configure(ICLTensor *input, ICLTensor *output, const FFTS
configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
}
-void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config)
+void CLFFTScaleKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ const FFTScaleKernelInfo &config)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
@@ -78,20 +81,22 @@ void CLFFTScaleKernel::configure(const CLCompileContext &compile_context, ICLTen
// Create kernel
CLBuildOptions build_opts;
build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() : input->info()->num_channels()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels()
+ : input->info()->num_channels()));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option_if(config.conjugate, "-DCONJ");
std::string kernel_name = "fft_scale_conj";
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set static arguments
- unsigned int idx = (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters
+ unsigned int idx =
+ (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters
_kernel.setArg<cl_float>(idx, config.scale);
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
- if(output != nullptr)
+ if (output != nullptr)
{
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output->info(), *input->info()->clone());
@@ -130,12 +135,11 @@ void CLFFTScaleKernel::run(const Window &window, cl::CommandQueue &queue)
{
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
- if(!_run_in_place)
+ if (!_run_in_place)
{
add_3D_tensor_argument(idx, _output, slice);
}
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFFTScaleKernel.h b/src/core/CL/kernels/CLFFTScaleKernel.h
index cc518be193..b995282e02 100644
--- a/src/core/CL/kernels/CLFFTScaleKernel.h
+++ b/src/core/CL/kernels/CLFFTScaleKernel.h
@@ -24,10 +24,10 @@
#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H
#define ARM_COMPUTE_CLFFTSCALEKERNEL_H
-#include "src/core/CL/ICLKernel.h"
-
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+
namespace arm_compute
{
// Forward declarations
@@ -63,7 +63,10 @@ public:
* @param[out] output Destination tensor. Data type supported: same as @p input
* @param[in] config Kernel configuration
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ const FFTScaleKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
*
* @param[in] input Source tensor info. Data types supported: F16/F32.
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index fcd99a4ed9..86bb502da3 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -31,14 +31,14 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "support/Cast.h"
#include "support/StringSupport.h"
namespace arm_compute
{
-CLFillBorderKernel::CLFillBorderKernel()
- : ICLKernel(), _tensor(nullptr)
+CLFillBorderKernel::CLFillBorderKernel() : ICLKernel(), _tensor(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -56,27 +56,38 @@ void CLFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue
ICLKernel::add_argument<T>(idx, static_cast<T>(value));
}
-void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
+void CLFillBorderKernel::configure(ICLTensor *tensor,
+ BorderSize border_size,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), tensor, border_size, border_mode, constant_border_value);
}
-void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
+void CLFillBorderKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *tensor,
+ BorderSize border_size,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value)
{
_tensor = tensor;
configure(compile_context, tensor->info(), border_size, border_mode, constant_border_value);
}
-void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
+void CLFillBorderKernel::configure(const CLCompileContext &compile_context,
+ ITensorInfo *tensor,
+ BorderSize border_size,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
ARM_COMPUTE_ERROR_ON(tensor->num_channels() != 1);
- auto padding_info = get_padding_info({ tensor });
+ auto padding_info = get_padding_info({tensor});
border_size.limit(tensor->padding());
// If there is no border: early exit
- if(border_size.empty() || border_mode == BorderMode::UNDEFINED)
+ if (border_size.empty() || border_mode == BorderMode::UNDEFINED)
{
return;
}
@@ -98,25 +109,22 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Create static kernel arguments
- const unsigned int valid_width = tensor->valid_region().shape[0];
- const unsigned int valid_height = tensor->valid_region().shape[1];
- const cl_int2 valid_region_coords =
- {
- {
- static_cast<cl_int>(tensor->valid_region().anchor[0]),
- static_cast<cl_int>(tensor->valid_region().anchor[1]),
- }
- };
- const unsigned int total_valid_width = border_size.left + valid_width + border_size.right;
+ const unsigned int valid_width = tensor->valid_region().shape[0];
+ const unsigned int valid_height = tensor->valid_region().shape[1];
+ const cl_int2 valid_region_coords = {{
+ static_cast<cl_int>(tensor->valid_region().anchor[0]),
+ static_cast<cl_int>(tensor->valid_region().anchor[1]),
+ }};
+ const unsigned int total_valid_width = border_size.left + valid_width + border_size.right;
// Set static kernel arguments
unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters
ICLKernel::add_argument<cl_uint>(idx, valid_width);
ICLKernel::add_argument<cl_uint>(idx, valid_height);
ICLKernel::add_argument<cl_int2>(idx, valid_region_coords);
- if(BorderMode::CONSTANT == border_mode)
+ if (BorderMode::CONSTANT == border_mode)
{
- switch(dt)
+ switch (dt)
{
case DataType::U8:
case DataType::QASYMM8:
@@ -175,12 +183,13 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ITen
void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
{
// Border mode undefined or border width == 0
- if(_kernel() == nullptr)
+ if (_kernel() == nullptr)
{
return;
}
- const auto tensor = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ const auto tensor =
+ utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
@@ -193,14 +202,13 @@ void CLFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, cl::
unsigned int idx = 0;
add_3D_tensor_argument(idx, tensor, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue)
{
// Border mode undefined or border width == 0
- if(_kernel() == nullptr)
+ if (_kernel() == nullptr)
{
return;
}
@@ -216,7 +224,6 @@ void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _tensor, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFillBorderKernel.h b/src/core/CL/kernels/CLFillBorderKernel.h
index 7951f48171..5782143cf9 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.h
+++ b/src/core/CL/kernels/CLFillBorderKernel.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -57,7 +58,11 @@ public:
* @param[in] border_mode Border mode to use for the convolution.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *tensor,
+ BorderSize border_size,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value = PixelValue());
/** Initialise the kernel's input, output and border mode.
*
* @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
@@ -65,7 +70,10 @@ public:
* @param[in] border_mode Border mode to use for the convolution.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ void configure(ICLTensor *tensor,
+ BorderSize border_size,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value = PixelValue());
/** Initialise the kernel's input, output and border mode.
*
* @param[in] compile_context The compile context to be used.
@@ -74,7 +82,11 @@ public:
* @param[in] border_mode Border mode to use for the convolution.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ void configure(const CLCompileContext &compile_context,
+ ITensorInfo *tensor,
+ BorderSize border_size,
+ BorderMode border_mode,
+ const PixelValue &constant_border_value = PixelValue());
/** Function to set the constant value on fill border kernel depending on type.
*
diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
index 68fe324df6..7da0679ae4 100644
--- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp
@@ -30,20 +30,26 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias, const ITensorInfo *bn_beta, const ITensorInfo *bn_gamma,
- float epsilon, FuseBatchNormalizationType fbn_type)
+Status validate_arguments(const ITensorInfo *input_weights,
+ const ITensorInfo *bn_mean,
+ const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights,
+ const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias,
+ const ITensorInfo *bn_beta,
+ const ITensorInfo *bn_gamma,
+ float epsilon,
+ FuseBatchNormalizationType fbn_type)
{
ARM_COMPUTE_UNUSED(epsilon);
ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var);
@@ -54,43 +60,44 @@ Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *b
ARM_COMPUTE_RETURN_ERROR_ON(input_bias == nullptr && fused_bias == nullptr);
ARM_COMPUTE_RETURN_ERROR_ON(bn_mean->num_dimensions() > 1);
- if(fbn_type == FuseBatchNormalizationType::CONVOLUTION)
+ if (fbn_type == FuseBatchNormalizationType::CONVOLUTION)
{
ARM_COMPUTE_RETURN_ERROR_ON(input_weights->dimension(3) != bn_mean->dimension(0));
}
else
{
- const size_t channel_idx = get_data_layout_dimension_index(input_weights->data_layout(), DataLayoutDimension::CHANNEL);
+ const size_t channel_idx =
+ get_data_layout_dimension_index(input_weights->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(input_weights->dimension(channel_idx) != bn_mean->dimension(0));
}
// Validate bias
- if(input_bias != nullptr)
+ if (input_bias != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, input_bias);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, input_bias);
}
// Validate beta
- if(bn_beta != nullptr)
+ if (bn_beta != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, bn_beta);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, bn_beta);
}
// Validate gamma
- if(bn_gamma != nullptr)
+ if (bn_gamma != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, bn_gamma);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, bn_gamma);
}
// Validate output weights
- if(fused_weights != nullptr && fused_weights->total_size() != 0)
+ if (fused_weights != nullptr && fused_weights->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_weights, fused_weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input_weights, fused_weights);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, fused_weights);
}
// Validate output bias
- if(fused_bias != nullptr && fused_bias->total_size() != 0)
+ if (fused_bias != nullptr && fused_bias->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mean, fused_bias);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_weights, fused_bias);
@@ -101,28 +108,52 @@ Status validate_arguments(const ITensorInfo *input_weights, const ITensorInfo *b
} // namespace
CLFuseBatchNormalizationKernel::CLFuseBatchNormalizationKernel()
- : _input_weights(nullptr), _input_bias(nullptr), _bn_mean(nullptr), _bn_var(nullptr), _bn_gamma(nullptr), _bn_beta(nullptr), _fused_weights(nullptr), _fused_bias(nullptr), _epsilon(),
- _run_in_place_weights(false), _run_in_place_bias(false)
+ : _input_weights(nullptr),
+ _input_bias(nullptr),
+ _bn_mean(nullptr),
+ _bn_var(nullptr),
+ _bn_gamma(nullptr),
+ _bn_beta(nullptr),
+ _fused_weights(nullptr),
+ _fused_bias(nullptr),
+ _epsilon(),
+ _run_in_place_weights(false),
+ _run_in_place_bias(false)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLFuseBatchNormalizationKernel::configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var,
- ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
- float epsilon, FuseBatchNormalizationType fbn_type)
+void CLFuseBatchNormalizationKernel::configure(const ICLTensor *input_weights,
+ const ICLTensor *bn_mean,
+ const ICLTensor *bn_var,
+ ICLTensor *fused_weights,
+ ICLTensor *fused_bias,
+ const ICLTensor *input_bias,
+ const ICLTensor *bn_beta,
+ const ICLTensor *bn_gamma,
+ float epsilon,
+ FuseBatchNormalizationType fbn_type)
{
- configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+ configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias,
+ input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
}
-void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var,
- ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
- float epsilon, FuseBatchNormalizationType fbn_type)
+void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input_weights,
+ const ICLTensor *bn_mean,
+ const ICLTensor *bn_var,
+ ICLTensor *fused_weights,
+ ICLTensor *fused_bias,
+ const ICLTensor *input_bias,
+ const ICLTensor *bn_beta,
+ const ICLTensor *bn_gamma,
+ float epsilon,
+ FuseBatchNormalizationType fbn_type)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var);
- auto padding_info = get_padding_info({ input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma });
+ auto padding_info =
+ get_padding_info({input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma});
_input_weights = input_weights;
_input_bias = input_bias;
@@ -135,28 +166,28 @@ void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_c
_epsilon = epsilon;
_run_in_place_weights = (fused_weights == nullptr) || (fused_weights == input_weights);
- _run_in_place_bias = (input_bias != nullptr && fused_bias == nullptr) || (input_bias != nullptr && fused_bias == input_bias);
+ _run_in_place_bias =
+ (input_bias != nullptr && fused_bias == nullptr) || (input_bias != nullptr && fused_bias == input_bias);
// Auto initialize outputs
- if(_fused_weights != nullptr)
+ if (_fused_weights != nullptr)
{
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*_fused_weights->info(), *_input_weights->info()->clone());
}
- if(_fused_bias != nullptr)
+ if (_fused_bias != nullptr)
{
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*_fused_bias->info(), *_bn_mean->info()->clone());
}
// Validate arguments
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_weights->info(), bn_mean->info(), bn_var->info(),
- (fused_weights != nullptr) ? fused_weights->info() : nullptr,
- (fused_bias != nullptr) ? fused_bias->info() : nullptr,
- (input_bias != nullptr) ? input_bias->info() : nullptr,
- (bn_beta != nullptr) ? bn_beta->info() : nullptr,
- (bn_gamma != nullptr) ? bn_gamma->info() : nullptr,
- epsilon, fbn_type));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(
+ input_weights->info(), bn_mean->info(), bn_var->info(),
+ (fused_weights != nullptr) ? fused_weights->info() : nullptr,
+ (fused_bias != nullptr) ? fused_bias->info() : nullptr, (input_bias != nullptr) ? input_bias->info() : nullptr,
+ (bn_beta != nullptr) ? bn_beta->info() : nullptr, (bn_gamma != nullptr) ? bn_gamma->info() : nullptr, epsilon,
+ fbn_type));
// Configure kernel window
Window win = calculate_max_window(*input_weights->info());
@@ -165,7 +196,8 @@ void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_c
// Set build options
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input_weights->info()->data_type()));
- build_opts.add_option_if(fbn_type == FuseBatchNormalizationType::CONVOLUTION, "-DDIM2=" + support::cpp11::to_string(input_weights->info()->dimension(2)));
+ build_opts.add_option_if(fbn_type == FuseBatchNormalizationType::CONVOLUTION,
+ "-DDIM2=" + support::cpp11::to_string(input_weights->info()->dimension(2)));
build_opts.add_option("-DEPSILON=" + float_to_string_with_full_precision(epsilon));
build_opts.add_option_if(_input_weights->info()->data_layout() == DataLayout::NHWC, "-DNHWC");
build_opts.add_option_if(_run_in_place_weights, "-DIN_PLACE_W");
@@ -180,12 +212,19 @@ void CLFuseBatchNormalizationKernel::configure(const CLCompileContext &compile_c
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias, const ITensorInfo *bn_beta, const ITensorInfo *bn_gamma,
- float epsilon, FuseBatchNormalizationType fbn_type)
+Status CLFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights,
+ const ITensorInfo *bn_mean,
+ const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights,
+ const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias,
+ const ITensorInfo *bn_beta,
+ const ITensorInfo *bn_gamma,
+ float epsilon,
+ FuseBatchNormalizationType fbn_type)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input_weights, bn_mean, bn_var, fused_weights, fused_bias,
+ input_bias, bn_beta, bn_gamma, epsilon, fbn_type));
return Status{};
}
@@ -202,25 +241,25 @@ void CLFuseBatchNormalizationKernel::run(const arm_compute::Window &window, cl::
// Add kernel arguments
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input_weights, slice_3d);
- if(_input_bias != nullptr)
+ if (_input_bias != nullptr)
{
add_1D_tensor_argument(idx, _input_bias, slice_1d);
}
add_1D_tensor_argument(idx, _bn_mean, slice_1d);
add_1D_tensor_argument(idx, _bn_var, slice_1d);
- if(!_run_in_place_weights)
+ if (!_run_in_place_weights)
{
add_3D_tensor_argument(idx, _fused_weights, slice_3d);
}
- if(!_run_in_place_bias)
+ if (!_run_in_place_bias)
{
add_1D_tensor_argument(idx, _fused_bias, slice_1d);
}
- if(_bn_beta != nullptr)
+ if (_bn_beta != nullptr)
{
add_1D_tensor_argument(idx, _bn_beta, slice_1d);
}
- if(_bn_gamma != nullptr)
+ if (_bn_gamma != nullptr)
{
add_1D_tensor_argument(idx, _bn_gamma, slice_1d);
}
diff --git a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h
index 78b1e74cab..76ec7a759f 100644
--- a/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h
+++ b/src/core/CL/kernels/CLFuseBatchNormalizationKernel.h
@@ -62,9 +62,16 @@ public:
* @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
* @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
*/
- void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ void configure(const ICLTensor *input_weights,
+ const ICLTensor *bn_mean,
+ const ICLTensor *bn_var,
+ ICLTensor *fused_weights,
+ ICLTensor *fused_bias,
+ const ICLTensor *input_bias = nullptr,
+ const ICLTensor *bn_beta = nullptr,
+ const ICLTensor *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
/** Set the source, destination of the kernel
*
* @param[in] compile_context The compile context to be used.
@@ -81,9 +88,17 @@ public:
* @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
* @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input_weights,
+ const ICLTensor *bn_mean,
+ const ICLTensor *bn_var,
+ ICLTensor *fused_weights,
+ ICLTensor *fused_bias,
+ const ICLTensor *input_bias = nullptr,
+ const ICLTensor *bn_beta = nullptr,
+ const ICLTensor *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
/** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel
*
* @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -101,10 +116,16 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ static Status validate(const ITensorInfo *input_weights,
+ const ITensorInfo *bn_mean,
+ const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights,
+ const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias = nullptr,
+ const ITensorInfo *bn_beta = nullptr,
+ const ITensorInfo *bn_gamma = nullptr,
+ float epsilon = 0.001f,
+ FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLGatherKernel.cpp b/src/core/CL/kernels/CLGatherKernel.cpp
index 5495023b80..c11a18940a 100644
--- a/src/core/CL/kernels/CLGatherKernel.cpp
+++ b/src/core/CL/kernels/CLGatherKernel.cpp
@@ -22,8 +22,10 @@
* SOFTWARE.
*/
#include "src/core/CL/kernels/CLGatherKernel.h"
+
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -34,7 +36,8 @@ namespace arm_compute
{
namespace
{
-inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
+inline Status
+validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output);
const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions()));
@@ -43,11 +46,12 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in
ARM_COMPUTE_RETURN_ERROR_ON(actual_axis >= input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis);
+ TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(
+ input->tensor_shape(), indices->tensor_shape(), actual_axis);
ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
}
@@ -56,12 +60,14 @@ inline Status validate_arguments(const ITensorInfo *input, const ITensorInfo *in
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, ITensorInfo *output, int axis)
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *indices, ITensorInfo *output, int axis)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
const uint32_t actual_axis = wrap_around(axis, static_cast<int>(input->num_dimensions()));
// Output auto initialization if not yet initialized
- TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), actual_axis);
+ TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(
+ input->tensor_shape(), indices->tensor_shape(), actual_axis);
auto_init_if_empty((*output), output_shape, 1, input->data_type());
// Create window
@@ -72,8 +78,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
-CLGatherKernel::CLGatherKernel()
- : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
+CLGatherKernel::CLGatherKernel() : _input(nullptr), _indices(nullptr), _output(nullptr), _axis(0)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -83,10 +88,14 @@ void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices,
configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, axis);
}
-void CLGatherKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
+void CLGatherKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *indices,
+ ICLTensor *output,
+ int axis)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
- auto padding_info = get_padding_info({ input, output, indices });
+ auto padding_info = get_padding_info({input, output, indices});
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis));
// Configure kernel window
@@ -100,7 +109,8 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC
// Set build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
+ build_opts.add_option("-DDATA_TYPE=" +
+ get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
build_opts.add_option("-DOUTPUT_DIM_Z=" + support::cpp11::to_string(output->info()->dimension(2)));
build_opts.add_option("-DINDICES_DIM_Z=" + support::cpp11::to_string(indices->info()->dimension(2)));
build_opts.add_option("-DINPUT_DIM_Z=" + support::cpp11::to_string(input->info()->dimension(2)));
@@ -114,10 +124,12 @@ void CLGatherKernel::configure(const CLCompileContext &compile_context, const IC
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
+Status
+CLGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, indices, output, axis));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), indices->clone().get(), output->clone().get(), axis).first);
return Status{};
}
diff --git a/src/core/CL/kernels/CLGatherKernel.h b/src/core/CL/kernels/CLGatherKernel.h
index 8f472a4696..db4b49d2f5 100644
--- a/src/core/CL/kernels/CLGatherKernel.h
+++ b/src/core/CL/kernels/CLGatherKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLGATHERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -63,7 +64,11 @@ public:
* @param[out] output Destination tensor. Data type supported: Same as @p input
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *indices,
+ ICLTensor *output,
+ int axis = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
*
@@ -74,7 +79,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
index 088c454f3c..b9ff72b928 100644
--- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
+++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -47,7 +48,7 @@ Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anc
ARM_COMPUTE_RETURN_ERROR_ON(anchors->dimension(0) != info.values_per_roi());
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(anchors, DataType::QSYMM16, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(anchors->num_dimensions() > 2);
- if(all_anchors->total_size() > 0)
+ if (all_anchors->total_size() > 0)
{
size_t feature_height = info.feat_height();
size_t feature_width = info.feat_width();
@@ -57,7 +58,7 @@ Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anc
ARM_COMPUTE_RETURN_ERROR_ON(all_anchors->dimension(0) != info.values_per_roi());
ARM_COMPUTE_RETURN_ERROR_ON(all_anchors->dimension(1) != feature_height * feature_width * num_anchors);
- if(is_data_type_quantized(anchors->data_type()))
+ if (is_data_type_quantized(anchors->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(anchors, all_anchors);
}
@@ -66,21 +67,25 @@ Status validate_arguments(const ITensorInfo *anchors, const ITensorInfo *all_anc
}
} // namespace
-CLComputeAllAnchorsKernel::CLComputeAllAnchorsKernel()
- : _anchors(nullptr), _all_anchors(nullptr)
+CLComputeAllAnchorsKernel::CLComputeAllAnchorsKernel() : _anchors(nullptr), _all_anchors(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info)
+void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors,
+ ICLTensor *all_anchors,
+ const ComputeAnchorsInfo &info)
{
configure(CLKernelLibrary::get().get_compile_context(), anchors, all_anchors, info);
}
-void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info)
+void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *anchors,
+ ICLTensor *all_anchors,
+ const ComputeAnchorsInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(anchors, all_anchors);
- auto padding_info = get_padding_info({ anchors, all_anchors });
+ auto padding_info = get_padding_info({anchors, all_anchors});
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(anchors->info(), all_anchors->info(), info));
// Metadata
@@ -91,7 +96,8 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex
// Initialize the output if empty
const TensorShape output_shape(info.values_per_roi(), width * height * num_anchors);
- auto_init_if_empty(*all_anchors->info(), TensorInfo(output_shape, 1, data_type, anchors->info()->quantization_info()));
+ auto_init_if_empty(*all_anchors->info(),
+ TensorInfo(output_shape, 1, data_type, anchors->info()->quantization_info()));
// Set instance variables
_anchors = anchors;
@@ -108,7 +114,7 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex
build_opts.add_option("-DNUM_ANCHORS=" + support::cpp11::to_string(num_anchors));
build_opts.add_option("-DNUM_ROI_FIELDS=" + support::cpp11::to_string(info.values_per_roi()));
- if(is_quantized)
+ if (is_quantized)
{
const UniformQuantizationInfo qinfo = anchors->info()->quantization_info().uniform();
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale));
@@ -116,8 +122,9 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex
}
// Create kernel
- const std::string kernel_name = (is_quantized) ? "generate_proposals_compute_all_anchors_quantized" : "generate_proposals_compute_all_anchors";
- _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
+ const std::string kernel_name =
+ (is_quantized) ? "generate_proposals_compute_all_anchors_quantized" : "generate_proposals_compute_all_anchors";
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// The tensor all_anchors can be interpreted as an array of structs (each structs has values_per_roi fields).
// This means we don't need to pad on the X dimension, as we know in advance how many fields
@@ -127,7 +134,9 @@ void CLComputeAllAnchorsKernel::configure(const CLCompileContext &compile_contex
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLComputeAllAnchorsKernel::validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info)
+Status CLComputeAllAnchorsKernel::validate(const ITensorInfo *anchors,
+ const ITensorInfo *all_anchors,
+ const ComputeAnchorsInfo &info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(anchors, all_anchors, info));
return Status{};
diff --git a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h
index d26795ac7d..e08f281d6c 100644
--- a/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h
+++ b/src/core/CL/kernels/CLGenerateProposalsLayerKernel.h
@@ -62,7 +62,10 @@ public:
* @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
*
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *anchors,
+ ICLTensor *all_anchors,
+ const ComputeAnchorsInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
*
@@ -81,5 +84,5 @@ private:
const ICLTensor *_anchors;
ICLTensor *_all_anchors;
};
-} // arm_compute
+} // namespace arm_compute
#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H
diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
index 7ed323c950..b13eb16556 100644
--- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -39,17 +40,20 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const InstanceNormalizationLayerKernelInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.epsilon == 0.f, "Epsilon must be different than 0");
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
- if(output != nullptr && output->total_size() != 0)
+ if (output != nullptr && output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), "Input and output have different number of channels");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(),
+ "Input and output have different number of channels");
}
return Status{};
@@ -59,27 +63,30 @@ Status validate_arguments_meanvar(const ITensorInfo *input, const ITensorInfo *o
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
- if(output != nullptr && output->total_size() != 0)
+ if (output != nullptr && output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), "Input and output have different number of channels");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(),
+ "Input and output have different number of channels");
}
return Status{};
}
} // namespace
-CLComputeMeanVariance::CLComputeMeanVariance()
- : _input(nullptr), _output(nullptr)
+CLComputeMeanVariance::CLComputeMeanVariance() : _input(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision)
+void CLComputeMeanVariance::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ bool use_mixed_precision)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output == nullptr ? input : output;
@@ -88,7 +95,8 @@ void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, I
const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
CLBuildOptions build_opts;
- build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option("-DINTERNAL_DATA_TYPE=" +
+ (use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type())));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0)));
@@ -108,7 +116,7 @@ void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, I
const TensorShape out_shape(input_channel, 2u, input_batches);
// Output auto initialization if not yet initialized
- if(use_mixed_precision)
+ if (use_mixed_precision)
{
auto_init_if_empty(*_output->info(), out_shape, 1, DataType::F32);
}
@@ -134,7 +142,7 @@ void CLComputeMeanVariance::run(const Window &window, cl::CommandQueue &queue)
Window collapsed_window = window.collapse(window, Window::DimZ);
// We will process the planes together
- if(_input->info()->data_layout() == DataLayout::NCHW)
+ if (_input->info()->data_layout() == DataLayout::NCHW)
{
collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1));
collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1));
@@ -157,10 +165,14 @@ CLInstanceNormalizationLayerKernel::CLInstanceNormalizationLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info)
+void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *mean_var,
+ ICLTensor *output,
+ const InstanceNormalizationLayerKernelInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output == nullptr ? input : output;
@@ -172,7 +184,9 @@ void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compi
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (info.use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (info.use_mixed_precision
+ ? "float"
+ : get_cl_type_from_data_type(input->info()->data_type())));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.add_option("-DDIM_Y=" + support::cpp11::to_string(input->info()->dimension(1)));
@@ -188,7 +202,7 @@ void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compi
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps(1));
- if(output != nullptr)
+ if (output != nullptr)
{
auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
}
@@ -197,7 +211,9 @@ void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compi
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLInstanceNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info)
+Status CLInstanceNormalizationLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const InstanceNormalizationLayerKernelInfo &info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info));
return Status{};
@@ -211,7 +227,7 @@ void CLInstanceNormalizationLayerKernel::run(const Window &window, cl::CommandQu
Window collapsed_window = window.collapse(window, Window::DimZ);
// We will process the planes together
- if(_input->info()->data_layout() == DataLayout::NCHW)
+ if (_input->info()->data_layout() == DataLayout::NCHW)
{
collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1));
collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1));
@@ -226,7 +242,7 @@ void CLInstanceNormalizationLayerKernel::run(const Window &window, cl::CommandQu
add_4D_tensor_argument(idx, _input, collapsed_window);
add_3D_tensor_argument(idx, _mean, collapsed_window);
- if(!_run_in_place)
+ if (!_run_in_place)
{
add_4D_tensor_argument(idx, _output, collapsed_window);
}
diff --git a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
index 2f9014a651..9f436da7f6 100644
--- a/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
+++ b/src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h
@@ -24,10 +24,10 @@
#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
-#include "src/core/CL/ICLKernel.h"
-
#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+
namespace arm_compute
{
// Forward declarations
@@ -59,7 +59,11 @@ public:
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] info Kernel meta-data descriptor
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *mean_var,
+ ICLTensor *output,
+ const InstanceNormalizationLayerKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
*
@@ -69,7 +73,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -106,7 +111,8 @@ public:
* @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
* @param[in] use_mixed_precision Use mixed precision in case of FP16 execution
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision);
+ void
+ configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision);
/** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
*
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 542d380e4a..9ed9d7c5b0 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -31,10 +31,10 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
namespace arm_compute
@@ -43,7 +43,8 @@ namespace
{
constexpr int max_input_tensor_dim = 3;
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
+Status
+validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
{
ARM_COMPUTE_UNUSED(epsilon);
@@ -53,14 +54,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis > 2, "Actual axis greater than 2 is not supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, "Actual normalization axis greater than max number of dimensions");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions,
+ "Actual normalization axis greater than max number of dimensions");
// Reduce shape on axis
TensorShape sum_shape = input->tensor_shape();
sum_shape.set(actual_axis, 1);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape);
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
@@ -78,16 +80,22 @@ CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
+void CLL2NormalizeLayerKernel::configure(
+ const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
{
configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, axis, epsilon);
}
-void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
+void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *sum,
+ ICLTensor *output,
+ int axis,
+ float epsilon)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon));
- auto padding_info = get_padding_info({ input, sum, output });
+ auto padding_info = get_padding_info({input, sum, output});
_input = input;
_sum = sum;
@@ -95,8 +103,9 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context
_actual_axis = wrap_around(axis, max_input_tensor_dim);
_epsilon = epsilon;
- const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
- const int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x;
+ const unsigned int vec_size_x =
+ adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
+ const int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x;
// Set build options
CLBuildOptions build_opts;
@@ -107,7 +116,7 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context
// Create kernel
std::string kernel_name;
unsigned int idx = 0;
- switch(_actual_axis)
+ switch (_actual_axis)
{
case 0:
kernel_name = "l2_normalize_x";
@@ -127,7 +136,7 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set epsilon argument
- if(input->info()->data_type() == DataType::F32)
+ if (input->info()->data_type() == DataType::F32)
{
_kernel.setArg<cl_float>(idx, _epsilon);
}
@@ -146,7 +155,8 @@ void CLL2NormalizeLayerKernel::configure(const CLCompileContext &compile_context
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
+Status CLL2NormalizeLayerKernel::validate(
+ const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon));
return Status{};
@@ -159,7 +169,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
Window window_sum(window);
- switch(_actual_axis)
+ switch (_actual_axis)
{
case 0:
{
@@ -173,8 +183,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
add_2D_tensor_argument(idx, _sum, sum_slice);
add_2D_tensor_argument(idx, _output, in_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
+ } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
}
break;
case 1:
@@ -189,8 +198,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
add_2D_tensor_argument(idx, _sum, sum_slice);
add_2D_tensor_argument(idx, _output, in_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
+ } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice));
}
break;
case 2:
@@ -205,8 +213,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
add_3D_tensor_argument(idx, _sum, sum_slice);
add_3D_tensor_argument(idx, _output, in_slice);
enqueue(queue, *this, in_slice, lws_hint());
- }
- while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice));
+ } while (window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice));
}
break;
default:
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.h b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h
index edc0585217..5c9ab94ce5 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.h
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -70,7 +71,12 @@ public:
* @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
* @param[in] epsilon Lower bound value for the normalization.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *sum,
+ ICLTensor *output,
+ int axis,
+ float epsilon);
/** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
*
@@ -84,7 +90,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
index dc9d68626d..e560f1de4a 100644
--- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -42,26 +43,31 @@ using namespace misc::shape_calculator;
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info,
+ const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, indices);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, indices);
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- PoolingType pool_type = pool_info.pool_type;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ PoolingType pool_type = pool_info.pool_type;
+ const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
- const int pool_size_x = pool_info.pool_size.width;
- const int pool_size_y = pool_info.pool_size.height;
+ const int pool_size_x = pool_info.pool_size.width;
+ const int pool_size_y = pool_info.pool_size.height;
const Size2D pool_size(pool_size_x, pool_size_y);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX,
+ "Pooling indices only supported for MAX pooling method");
ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2");
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
@@ -71,17 +77,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
}
} // namespace
-CLMaxUnpoolingLayerKernel::CLMaxUnpoolingLayerKernel()
- : _input(nullptr), _output(nullptr), _indices(nullptr)
+CLMaxUnpoolingLayerKernel::CLMaxUnpoolingLayerKernel() : _input(nullptr), _output(nullptr), _indices(nullptr)
{
_type = CLKernelType::POOL;
}
-void CLMaxUnpoolingLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info)
+void CLMaxUnpoolingLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *indices,
+ ICLTensor *output,
+ const PoolingLayerInfo &pool_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, indices->info()));
- auto padding_info = get_padding_info({ input, indices, output });
+ auto padding_info = get_padding_info({input, indices, output});
_input = input;
_output = output;
@@ -119,7 +128,10 @@ void CLMaxUnpoolingLayerKernel::configure(const CLCompileContext &compile_contex
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLMaxUnpoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status CLMaxUnpoolingLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *indices,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
@@ -140,7 +152,6 @@ void CLMaxUnpoolingLayerKernel::run(const Window &window, cl::CommandQueue &queu
add_3D_tensor_argument(idx, _output, slice);
add_3D_tensor_argument(idx, _indices, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_3D(slice));
+ } while (window.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h
index 45481d0507..eb18a46784 100644
--- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h
+++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h
@@ -59,7 +59,11 @@ public:
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *indices,
+ ICLTensor *output,
+ const PoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -72,7 +76,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *indices,
+ const ITensorInfo *output,
+ const PoolingLayerInfo &pool_info);
// Inherited methods overridden
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
index ac33468ad8..8632bdf623 100644
--- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -49,7 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, f
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
// Checks performed when output is configured
- if((output != nullptr) && (output->total_size() != 0))
+ if ((output != nullptr) && (output->total_size() != 0))
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -69,15 +70,19 @@ void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *out
configure(CLKernelLibrary::get().get_compile_context(), input, output, epsilon);
}
-void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon)
+void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output,
+ float epsilon)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
_run_in_place = (output == nullptr) || (output == input);
- ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate(input->info(), (output != nullptr) ? output->info() : nullptr, epsilon));
+ ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevNormalizationKernel::validate(
+ input->info(), (output != nullptr) ? output->info() : nullptr, epsilon));
- if(output != nullptr)
+ if (output != nullptr)
{
auto_init_if_empty(*output->info(), *input->info());
}
@@ -85,7 +90,8 @@ void CLMeanStdDevNormalizationKernel::configure(const CLCompileContext &compile_
_input = input;
_output = output;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0));
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0));
// Set build options
CLBuildOptions build_opts;
@@ -134,7 +140,6 @@ void CLMeanStdDevNormalizationKernel::run(const Window &window, cl::CommandQueue
add_2D_tensor_argument_if((!_run_in_place), idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
+ } while (window.slide_window_slice_2D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
index a1ba2b905e..e02a3c58a3 100644
--- a/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
+++ b/src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h
@@ -66,7 +66,10 @@ public:
* @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
* @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
*/
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+ void configure(const CLCompileContext &compile_context,
+ ICLTensor *input,
+ ICLTensor *output = nullptr,
+ float epsilon = 1e-8f);
/** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
*
* @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index c6c4229c00..b636c485e7 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -32,6 +32,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/core/Window.h"
+
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
@@ -53,7 +54,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, N
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
// Checks performed when output is configured
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
@@ -63,7 +64,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, N
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info)
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info)
{
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*output, *input->clone());
@@ -71,9 +73,10 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
bool window_changed = false;
Window win;
const DataLayout data_layout = input->data_layout();
- if(data_layout == DataLayout::NCHW)
+ if (data_layout == DataLayout::NCHW)
{
- const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
+ const unsigned int vec_size_x =
+ adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info);
const bool is_norm_across_width = norm_idx == 0;
@@ -87,15 +90,16 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
// The output has 1 right padding because of the vec_size_x.
// The input has 1 left padding because radius = 1.
// The input has 2 right padding because of radius = 1 AND because of the extra output padding
- const unsigned int border_width_left = is_norm_across_width ? norm_radius : 0;
- const unsigned int border_width_right = is_norm_across_width ? norm_radius + (vec_size_x - input->dimension(0) % vec_size_x) : 0;
- const BorderSize border_size = BorderSize(0, border_width_right, 0, border_width_left);
+ const unsigned int border_width_left = is_norm_across_width ? norm_radius : 0;
+ const unsigned int border_width_right =
+ is_norm_across_width ? norm_radius + (vec_size_x - input->dimension(0) % vec_size_x) : 0;
+ const BorderSize border_size = BorderSize(0, border_width_right, 0, border_width_left);
win = calculate_max_window(*input, Steps(vec_size_x));
// We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding
// Reads can occur within the valid region of the input
- if(is_norm_across_width)
+ if (is_norm_across_width)
{
AccessWindowStatic input_access(input, -border_size.left, 0, input->dimension(0) + border_size.right, 0);
window_changed = window_changed || update_window_and_padding(win, input_access);
@@ -112,13 +116,14 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
else
{
unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
- if(norm_info.is_cross_map())
+ if (norm_info.is_cross_map())
{
vec_size_x = 1;
}
win = calculate_max_window(*input, Steps(vec_size_x));
}
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
@@ -139,10 +144,13 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info);
}
-void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info)
+void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ NormalizationLayerInfo norm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info));
@@ -152,16 +160,17 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte
_input = input;
_output = output;
- const DataLayout data_layout = input->info()->data_layout();
- unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
- int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x;
- if(norm_info.is_cross_map() && data_layout == DataLayout::NHWC)
+ const DataLayout data_layout = input->info()->data_layout();
+ unsigned int vec_size_x =
+ adjust_vec_size(max_cl_vector_width / input->info()->element_size(), input->info()->dimension(0));
+ int vec_size_x_leftovers = input->info()->dimension(0) % vec_size_x;
+ if (norm_info.is_cross_map() && data_layout == DataLayout::NHWC)
{
vec_size_x = 1;
vec_size_x_leftovers = 0;
}
- if(data_layout == DataLayout::NCHW)
+ if (data_layout == DataLayout::NCHW)
{
const unsigned int norm_idx = get_normalization_dimension_index(data_layout, norm_info);
_is_norm_across_width = norm_idx == 0;
@@ -175,9 +184,10 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte
// The output has 1 right padding because of the vec_size_x.
// The input has 1 left padding because radius = 1.
// The input has 2 right padding because of radius = 1 AND the extra output padding
- const unsigned int border_width_left = _is_norm_across_width ? norm_radius : 0;
- const unsigned int border_width_right = _is_norm_across_width ? norm_radius + (vec_size_x - input->info()->dimension(0) % vec_size_x) : 0;
- _border_size = BorderSize(0, border_width_right, 0, border_width_left);
+ const unsigned int border_width_left = _is_norm_across_width ? norm_radius : 0;
+ const unsigned int border_width_right =
+ _is_norm_across_width ? norm_radius + (vec_size_x - input->info()->dimension(0) % vec_size_x) : 0;
+ _border_size = BorderSize(0, border_width_right, 0, border_width_left);
}
const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
@@ -193,12 +203,14 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte
build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2)));
build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
- build_opts.add_option_if(norm_info.is_in_map() || (data_layout == DataLayout::NHWC && norm_info.is_cross_map()), "-DWIDTH_SIZE=" + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.add_option_if(norm_info.is_in_map() && data_layout == DataLayout::NHWC, "-DDIM1_SIZE=" + support::cpp11::to_string(input->info()->dimension(1)));
+ build_opts.add_option_if(norm_info.is_in_map() || (data_layout == DataLayout::NHWC && norm_info.is_cross_map()),
+ "-DWIDTH_SIZE=" + support::cpp11::to_string(input->info()->dimension(0)));
+ build_opts.add_option_if(norm_info.is_in_map() && data_layout == DataLayout::NHWC,
+ "-DDIM1_SIZE=" + support::cpp11::to_string(input->info()->dimension(1)));
// Create kernel
std::string kernel_name;
- if(norm_info.is_in_map())
+ if (norm_info.is_in_map())
{
kernel_name = "normalization_layer_in_map_" + lower_string(string_from_data_layout(data_layout));
}
@@ -222,16 +234,19 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte
_config_id += support::cpp11::to_string(input->info()->dimension(0));
_config_id += "_";
_config_id += support::cpp11::to_string(input->info()->dimension(1));
- if(data_layout == DataLayout::NHWC)
+ if (data_layout == DataLayout::NHWC)
{
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
}
-Status CLNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info)
+Status CLNormalizationLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ NormalizationLayerInfo norm_info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
return Status{};
}
@@ -251,7 +266,6 @@ void CLNormalizationLayerKernel::run(const Window &window, cl::CommandQueue &que
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window_collapsed.slide_window_slice_3D(slice));
+ } while (window_collapsed.slide_window_slice_3D(slice));
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.h b/src/core/CL/kernels/CLNormalizationLayerKernel.h
index 739a2ae9f1..5517ba6904 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.h
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.h
@@ -63,7 +63,10 @@ public:
* Data layouts supported: same as @p input.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ NormalizationLayerInfo norm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
@@ -77,7 +80,7 @@ public:
static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info);
// Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
+ void run(const Window &window, cl::CommandQueue &queue) override;
BorderSize border_size() const override;
private:
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
index 6b0400d50e..59352a8fb7 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp
@@ -31,32 +31,35 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
+Status
+validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(mean, std);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(mean->num_dimensions() > 1, "mean and std must be vectors");
- const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
+ const unsigned int channel_idx =
+ get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != mean->dimension(0));
// Checks performed when output is configured
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
@@ -77,7 +80,8 @@ std::pair<Status, Window> validate_and_configure_window_nchw(ITensorInfo *input,
bool window_changed = update_window_and_padding(win, input_access, output_access);
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
@@ -88,12 +92,19 @@ CLNormalizePlanarYUVLayerKernel::CLNormalizePlanarYUVLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
+void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *std)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, std);
}
-void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
+void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *std)
{
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
@@ -102,7 +113,7 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_
// Output tensor auto initialization if not yet initialized
auto_init_if_empty(*output->info(), *input->info()->clone());
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
@@ -112,9 +123,10 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_
const DataLayout data_layout = input->info()->data_layout();
// Get number of elements to process per iterations
- const unsigned int num_elems_processed_per_iteration = (data_layout == DataLayout::NHWC) ? adjust_vec_size(16 / input->info()->element_size(),
- input->info()->dimension(0)) :
- (16 / input->info()->element_size());
+ const unsigned int num_elems_processed_per_iteration =
+ (data_layout == DataLayout::NHWC)
+ ? adjust_vec_size(16 / input->info()->element_size(), input->info()->dimension(0))
+ : (16 / input->info()->element_size());
const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const DataType dt = input->info()->data_type();
@@ -122,11 +134,12 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_
CLBuildOptions build_opts;
build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- build_opts.add_option(("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration)));
+ build_opts.add_option(("-DVEC_SIZE_LEFTOVER=" +
+ support::cpp11::to_string(input->info()->dimension(0) % num_elems_processed_per_iteration)));
build_opts.add_option(("-DNUM_CHANNELS=" + support::cpp11::to_string(input->info()->dimension(channel_idx))));
std::string kernel_name = "normalize_planar_yuv_layer_";
- if(is_data_type_quantized(dt))
+ if (is_data_type_quantized(dt))
{
const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)));
@@ -139,7 +152,7 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_
_kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
- if(data_layout == DataLayout::NHWC)
+ if (data_layout == DataLayout::NHWC)
{
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
ICLKernel::configure_internal(win);
@@ -165,12 +178,16 @@ void CLNormalizePlanarYUVLayerKernel::configure(const CLCompileContext &compile_
_config_id += support::cpp11::to_string(input->info()->dimension(2));
}
-Status CLNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std)
+Status CLNormalizePlanarYUVLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *mean,
+ const ITensorInfo *std)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, mean, std));
- if(input->data_layout() == DataLayout::NCHW)
+ if (input->data_layout() == DataLayout::NCHW)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_nchw(input->clone().get(), output->clone().get()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window_nchw(input->clone().get(), output->clone().get()).first);
}
return Status{};
}
@@ -196,7 +213,6 @@ void CLNormalizePlanarYUVLayerKernel::run(const Window &window, cl::CommandQueue
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
index 6db4433e78..341b404e3d 100644
--- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
+++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h
@@ -67,7 +67,11 @@ public:
* @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
* Data types supported: same as @p input
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *mean,
+ const ICLTensor *std);
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
@@ -79,7 +83,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index 53f313c0d3..0ac285038e 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -35,25 +36,29 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ PixelValue constant_value,
+ PaddingMode mode)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_UNUSED(constant_value);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON((padding.size() < 1) || (padding.size() > input->num_dimensions()));
- if(mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC)
+ if (mode == PaddingMode::REFLECT || mode == PaddingMode::SYMMETRIC)
{
ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 3);
const auto is_reflect = static_cast<unsigned int>(mode == PaddingMode::REFLECT);
- for(size_t i = 0; i < padding.size(); ++i)
+ for (size_t i = 0; i < padding.size(); ++i)
{
ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).first > (input->dimension(i) - is_reflect));
ARM_COMPUTE_RETURN_ERROR_ON(padding.at(i).second > (input->dimension(i) - is_reflect));
}
}
- if(output->total_size() > 0)
+ if (output->total_size() > 0)
{
TensorShape padded_shape = misc::shape_calculator::compute_padded_shape(input->tensor_shape(), padding);
@@ -65,41 +70,51 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
}
} // namespace
-CLPadLayerKernel::CLPadLayerKernel()
- : _input(nullptr), _output(nullptr), _4d_enabled(false)
+CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _4d_enabled(false)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+void CLPadLayerKernel::configure(
+ const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode);
}
-void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+void CLPadLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const PaddingList &padding,
+ PixelValue constant_value,
+ PaddingMode mode)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding)));
+ auto_init_if_empty(*output->info(),
+ input->info()->clone()->set_tensor_shape(
+ misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding)));
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, constant_value, mode));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
_4d_enabled = (mode == PaddingMode::CONSTANT) && (padding.size() > 3);
// Set build options
- const DataType &data_type = input->info()->data_type();
- const unsigned int input_width = input->info()->dimension(0);
- const unsigned int input_height = input->info()->dimension(1);
- const unsigned int input_depth = input->info()->dimension(2);
- const unsigned int pad_x_before = padding.at(0).first;
- const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0;
- const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0;
- const unsigned int vec_size = adjust_vec_size(std::min(16U, 32U / static_cast<unsigned int>(element_size_from_data_type(input->info()->data_type()))), input_width);
- const unsigned int pad_right_start = input_width + pad_x_before;
- const unsigned int pad_x_before_remainder = pad_x_before % vec_size;
- const unsigned int vec_size_leftover_write = vec_size - (ceil_to_multiple(output->info()->dimension(0), vec_size) - output->info()->dimension(0));
+ const DataType &data_type = input->info()->data_type();
+ const unsigned int input_width = input->info()->dimension(0);
+ const unsigned int input_height = input->info()->dimension(1);
+ const unsigned int input_depth = input->info()->dimension(2);
+ const unsigned int pad_x_before = padding.at(0).first;
+ const unsigned int pad_y_before = padding.size() > 1 ? padding.at(1).first : 0;
+ const unsigned int pad_z_before = padding.size() > 2 ? padding.at(2).first : 0;
+ const unsigned int vec_size = adjust_vec_size(
+ std::min(16U, 32U / static_cast<unsigned int>(element_size_from_data_type(input->info()->data_type()))),
+ input_width);
+ const unsigned int pad_right_start = input_width + pad_x_before;
+ const unsigned int pad_x_before_remainder = pad_x_before % vec_size;
+ const unsigned int vec_size_leftover_write =
+ vec_size - (ceil_to_multiple(output->info()->dimension(0), vec_size) - output->info()->dimension(0));
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
@@ -108,12 +123,12 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const
build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input_width));
build_opts.add_option("-DPAD_X_BEFORE_REMAINDER=" + support::cpp11::to_string(pad_x_before_remainder));
build_opts.add_option("-DVEC_SIZE_LEFTOVER_WRITE=" + support::cpp11::to_string(vec_size_leftover_write));
- if(padding.size() > 1)
+ if (padding.size() > 1)
{
build_opts.add_option("-DPAD_Y_BEFORE=" + support::cpp11::to_string(pad_y_before));
build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input_height));
- if(padding.size() > 2)
+ if (padding.size() > 2)
{
build_opts.add_option("-DPAD_Z_BEFORE=" + support::cpp11::to_string(pad_z_before));
build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input_depth));
@@ -121,23 +136,25 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const
}
std::string kernel_name = "pad_layer_";
- switch(mode)
+ switch (mode)
{
case PaddingMode::CONSTANT:
{
kernel_name += "constant";
- const unsigned int vec_size_leftover_read = vec_size - (ceil_to_multiple(pad_right_start, vec_size) - pad_right_start);
+ const unsigned int vec_size_leftover_read =
+ vec_size - (ceil_to_multiple(pad_right_start, vec_size) - pad_right_start);
build_opts.add_option("-DCONST_VAL=" + string_from_pixel_value(constant_value, data_type));
build_opts.add_option("-DVEC_SIZE_LEFTOVER_READ=" + support::cpp11::to_string(vec_size_leftover_read));
- if(pad_x_before >= vec_size)
+ if (pad_x_before >= vec_size)
{
build_opts.add_option("-DTHREADS_TO_SKIP_BEFORE=" + support::cpp11::to_string(pad_x_before / vec_size));
- build_opts.add_option("-DTHREADS_TO_SKIP_AFTER=" + support::cpp11::to_string(pad_right_start / vec_size));
+ build_opts.add_option("-DTHREADS_TO_SKIP_AFTER=" +
+ support::cpp11::to_string(pad_right_start / vec_size));
}
- if(_4d_enabled)
+ if (_4d_enabled)
{
build_opts.add_option("-DPAD_W_BEFORE=" + support::cpp11::to_string(padding.at(3).first));
build_opts.add_option("-DSRC_BATCH=" + support::cpp11::to_string(input->info()->dimension(3)));
@@ -154,14 +171,17 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const
const unsigned int pad_x_after_remainder = pad_right_start % vec_size;
const unsigned int after_pad_fact_x = (2 * input_width + pad_x_before) - is_reflect;
- const unsigned int output_last_x = ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size);
+ const unsigned int output_last_x = ceil_to_multiple(pad_right_start + padding.at(0).second, vec_size);
build_opts.add_option("-DIS_REFLECT=" + support::cpp11::to_string(is_reflect));
build_opts.add_option("-DPAD_X_AFTER_REMAINDER=" + support::cpp11::to_string(pad_x_after_remainder));
- build_opts.add_option("-DPAD_X_BEFORE_REMAINDER_REFL=" + support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size));
- build_opts.add_option("-DPAD_X_AFTER_REMAINDER_REFL=" + support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size));
+ build_opts.add_option("-DPAD_X_BEFORE_REMAINDER_REFL=" +
+ support::cpp11::to_string((pad_x_before_remainder + is_reflect) % vec_size));
+ build_opts.add_option("-DPAD_X_AFTER_REMAINDER_REFL=" +
+ support::cpp11::to_string((pad_x_after_remainder - is_reflect) % vec_size));
build_opts.add_option("-DAFTER_PAD_FACT_X=" + support::cpp11::to_string(after_pad_fact_x));
- build_opts.add_option_if(after_pad_fact_x < output_last_x, "-DAFTER_PAD_REM=" + support::cpp11::to_string(after_pad_fact_x % vec_size));
+ build_opts.add_option_if(after_pad_fact_x < output_last_x,
+ "-DAFTER_PAD_REM=" + support::cpp11::to_string(after_pad_fact_x % vec_size));
break;
}
@@ -179,7 +199,11 @@ void CLPadLayerKernel::configure(const CLCompileContext &compile_context, const
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLPadLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+Status CLPadLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ PixelValue constant_value,
+ PaddingMode mode)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, constant_value, mode));
return Status{};
@@ -197,13 +221,12 @@ void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- if(_4d_enabled)
+ if (_4d_enabled)
{
add_argument<unsigned int>(idx, batch++);
}
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_3D(slice));
+ } while (window.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLPadLayerKernel.h b/src/core/CL/kernels/CLPadLayerKernel.h
index 90af337f94..dca121b6a1 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.h
+++ b/src/core/CL/kernels/CLPadLayerKernel.h
@@ -56,7 +56,11 @@ public:
* @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
- void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+ void configure(const ICLTensor *input,
+ ICLTensor *output,
+ const PaddingList &padding,
+ PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
/** Set the input and output tensor.
*
* @param[in] compile_context The compile context to be used.
@@ -68,8 +72,12 @@ public:
* @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
- PaddingMode mode = PaddingMode::CONSTANT);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const PaddingList &padding,
+ PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
/** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
*
* @param[in] input Source tensor info. Data types supported: All.
@@ -80,7 +88,11 @@ public:
* @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const PaddingList &padding,
+ PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
index bf1b874dd0..7dcdf1de6f 100644
--- a/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.cpp
@@ -30,10 +30,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
@@ -42,7 +42,10 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info)
+Status validate_arguments(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const PriorBoxLayerInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32);
@@ -51,10 +54,10 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
// Check variances
const int var_size = info.variances().size();
- if(var_size > 1)
+ if (var_size > 1)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size != 4, "Must provide 4 variance values");
- for(int i = 0; i < var_size; ++i)
+ for (int i = 0; i < var_size; ++i)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(var_size <= 0, "Must be greater than 0");
}
@@ -62,17 +65,19 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[0] < 0.f, "Step x should be greater or equal to 0");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.steps()[1] < 0.f, "Step y should be greater or equal to 0");
- if(!info.max_sizes().empty())
+ if (!info.max_sizes().empty())
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(), "Max and min sizes dimensions should match");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes().size() != info.min_sizes().size(),
+ "Max and min sizes dimensions should match");
}
- for(unsigned int i = 0; i < info.max_sizes().size(); ++i)
+ for (unsigned int i = 0; i < info.max_sizes().size(); ++i)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i], "Max size should be greater than min size");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.max_sizes()[i] < info.min_sizes()[i],
+ "Max size should be greater than min size");
}
- if(output != nullptr && output->total_size() != 0)
+ if (output != nullptr && output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != 2);
}
@@ -80,7 +85,11 @@ Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const PriorBoxLayerInfo &info, int num_priors)
+std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ ITensorInfo *output,
+ const PriorBoxLayerInfo &info,
+ int num_priors)
{
ARM_COMPUTE_UNUSED(input2);
// Output tensor auto initialization if not yet initialized
@@ -88,10 +97,11 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
auto_init_if_empty(*output, output_shape, 1, input1->data_type());
const unsigned int num_elems_processed_per_iteration = 4 * num_priors;
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+ Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
bool window_changed = update_window_and_padding(win, output_access);
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ Status err =
+ (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
} // namespace
@@ -102,13 +112,25 @@ CLPriorBoxLayerKernel::CLPriorBoxLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios)
+void CLPriorBoxLayerKernel::configure(const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ const PriorBoxLayerInfo &info,
+ cl::Buffer *min,
+ cl::Buffer *max,
+ cl::Buffer *aspect_ratios)
{
configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, info, min, max, aspect_ratios);
}
-void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min,
- cl::Buffer *max, cl::Buffer *aspect_ratios)
+void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ const PriorBoxLayerInfo &info,
+ cl::Buffer *min,
+ cl::Buffer *max,
+ cl::Buffer *aspect_ratios)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
@@ -135,7 +157,7 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c
int img_width = info.img_size().x;
int img_height = info.img_size().y;
- if(img_width == 0 || img_height == 0)
+ if (img_width == 0 || img_height == 0)
{
img_width = input2->info()->dimension(width_idx);
img_height = input2->info()->dimension(height_idx);
@@ -143,7 +165,7 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c
float step_x = info.steps()[0];
float step_y = info.steps()[0];
- if(step_x == 0.f || step_y == 0.f)
+ if (step_x == 0.f || step_y == 0.f)
{
step_x = static_cast<float>(img_width) / layer_width;
step_y = static_cast<float>(img_height) / layer_height;
@@ -162,18 +184,20 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(info.offset()));
build_opts.add_option_if(info.clip(), "-DIN_PLACE");
- if(info.variances().size() > 1)
+ if (info.variances().size() > 1)
{
- for(unsigned int i = 0; i < info.variances().size(); ++i)
+ for (unsigned int i = 0; i < info.variances().size(); ++i)
{
- build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(info.variances().at(i)));
+ build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" +
+ support::cpp11::to_string(info.variances().at(i)));
}
}
else
{
- for(unsigned int i = 0; i < 4; ++i)
+ for (unsigned int i = 0; i < 4; ++i)
{
- build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(info.variances().at(0)));
+ build_opts.add_option("-DVARIANCE_" + support::cpp11::to_string(i) + "=" +
+ support::cpp11::to_string(info.variances().at(0)));
}
}
@@ -194,13 +218,17 @@ void CLPriorBoxLayerKernel::configure(const CLCompileContext &compile_context, c
ICLKernel::configure_internal(win_config.second);
}
-Status CLPriorBoxLayerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info)
+Status CLPriorBoxLayerKernel::validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const PriorBoxLayerInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output, info));
const int num_priors = info.aspect_ratios().size() * info.min_sizes().size() + info.max_sizes().size();
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get(), info, num_priors)
- .first);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(),
+ output->clone().get(), info, num_priors)
+ .first);
return Status{};
}
@@ -211,8 +239,9 @@ void CLPriorBoxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
queue.enqueueWriteBuffer(*_min, CL_TRUE, 0, _info.min_sizes().size() * sizeof(float), _info.min_sizes().data());
- queue.enqueueWriteBuffer(*_aspect_ratios, CL_TRUE, 0, _info.aspect_ratios().size() * sizeof(float), _info.aspect_ratios().data());
- if(!_info.max_sizes().empty())
+ queue.enqueueWriteBuffer(*_aspect_ratios, CL_TRUE, 0, _info.aspect_ratios().size() * sizeof(float),
+ _info.aspect_ratios().data());
+ if (!_info.max_sizes().empty())
{
queue.enqueueWriteBuffer(*_max, CL_TRUE, 0, _info.max_sizes().size() * sizeof(float), _info.max_sizes().data());
}
diff --git a/src/core/CL/kernels/CLPriorBoxLayerKernel.h b/src/core/CL/kernels/CLPriorBoxLayerKernel.h
index 6c369a7a4e..a50e0c5ff5 100644
--- a/src/core/CL/kernels/CLPriorBoxLayerKernel.h
+++ b/src/core/CL/kernels/CLPriorBoxLayerKernel.h
@@ -57,7 +57,13 @@ public:
* @param[in] max Maximum prior box values
* @param[in] aspect_ratios Aspect ratio values
*/
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios);
+ void configure(const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ const PriorBoxLayerInfo &info,
+ cl::Buffer *min,
+ cl::Buffer *max,
+ cl::Buffer *aspect_ratios);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -69,8 +75,14 @@ public:
* @param[in] max Maximum prior box values
* @param[in] aspect_ratios Aspect ratio values
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max,
- cl::Buffer *aspect_ratios);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input1,
+ const ICLTensor *input2,
+ ICLTensor *output,
+ const PriorBoxLayerInfo &info,
+ cl::Buffer *min,
+ cl::Buffer *max,
+ cl::Buffer *aspect_ratios);
/** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel
*
* @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
@@ -80,14 +92,17 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
+ static Status validate(const ITensorInfo *input1,
+ const ITensorInfo *input2,
+ const ITensorInfo *output,
+ const PriorBoxLayerInfo &info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
ICLTensor *_output;
PriorBoxLayerInfo _info;
int _num_priors;
diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
index bd573e54c8..731fcb8e04 100644
--- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
+++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp
@@ -22,10 +22,12 @@
* SOFTWARE.
*/
#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -49,14 +51,19 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const uint32_t temp_num_elems_processed_per_iteration = max_cl_vector_width / input->element_size();
/* If width is less then step, then make step same as width to avoid global size being step instead of actual width. */
/* Or we should fix in arm_compute::enqueue() or arm_compute::calculate_max_window(). */
- const uint32_t num_elems_processed_per_iteration = (input->dimension(0) < temp_num_elems_processed_per_iteration) ? input->dimension(0) : temp_num_elems_processed_per_iteration;
+ const uint32_t num_elems_processed_per_iteration = (input->dimension(0) < temp_num_elems_processed_per_iteration)
+ ? input->dimension(0)
+ : temp_num_elems_processed_per_iteration;
// This kernel doesn't need padding
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
return std::make_pair(Status{}, win);
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *weight,
+ const ITensorInfo *bias)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weight, bias, output);
@@ -72,7 +79,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(weight, bias);
// Checks performed when output is configured
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -87,10 +94,14 @@ CLQLSTMLayerNormalizationKernel::CLQLSTMLayerNormalizationKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias)
+void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *weight,
+ const ICLTensor *bias)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, bias, output);
- auto padding_info = get_padding_info({ input, weight, bias, output });
+ auto padding_info = get_padding_info({input, weight, bias, output});
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), weight->info(), bias->info()));
@@ -104,7 +115,8 @@ void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_
int32_t output_multiplier{};
int32_t output_shift{};
const UniformQuantizationInfo quan_info = _weight->info()->quantization_info().uniform();
- const Status status = quantization::calculate_quantized_multiplier(quan_info.scale, &output_multiplier, &output_shift);
+ const Status status =
+ quantization::calculate_quantized_multiplier(quan_info.scale, &output_multiplier, &output_shift);
output_shift *= -1;
// Set build options
@@ -114,8 +126,12 @@ void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_
build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
- build_opts.add_option("-DMIN_BOUND=" + support::cpp11::to_string(std::get<0>(quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type()))));
- build_opts.add_option("-DMAX_BOUND=" + support::cpp11::to_string(std::get<1>(quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type()))));
+ build_opts.add_option("-DMIN_BOUND=" +
+ support::cpp11::to_string(std::get<0>(
+ quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type()))));
+ build_opts.add_option("-DMAX_BOUND=" +
+ support::cpp11::to_string(std::get<1>(
+ quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type()))));
// Create kernel
_kernel = create_kernel(compile_context, "qlstm_layer_normalization", build_opts.options());
@@ -135,12 +151,18 @@ void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-void CLQLSTMLayerNormalizationKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias)
+void CLQLSTMLayerNormalizationKernel::configure(const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *weight,
+ const ICLTensor *bias)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, weight, bias);
}
-Status CLQLSTMLayerNormalizationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias)
+Status CLQLSTMLayerNormalizationKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const ITensorInfo *weight,
+ const ITensorInfo *bias)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, weight, bias));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
@@ -171,7 +193,6 @@ void CLQLSTMLayerNormalizationKernel::run(const Window &window, cl::CommandQueue
add_2D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_2D(slice));
+ } while (window.slide_window_slice_2D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
index 31085c37ba..ba912e1d2d 100644
--- a/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
+++ b/src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h
@@ -63,7 +63,11 @@ public:
* @param[in] weight Weight tensor. Data types supported: Same as @p input.
* @param[in] bias Bias tensor. Data types supported: S32.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *weight,
+ const ICLTensor *bias);
/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel
*
* @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16.
@@ -73,7 +77,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
index 69a6fa5fa0..c97910ef79 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -42,24 +43,29 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output);
ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(0) != 5);
ARM_COMPUTE_RETURN_ERROR_ON(rois->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F32, DataType::F16);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::F32, DataType::F16);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC, DataLayout::NCHW);
ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0));
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(compute_roi_align_shape(*input, *rois, pool_info), output->tensor_shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(compute_roi_align_shape(*input, *rois, pool_info),
+ output->tensor_shape());
}
- if(is_data_type_quantized_asymmetric(input->data_type()))
+ if (is_data_type_quantized_asymmetric(input->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rois, 1, DataType::QASYMM16);
@@ -82,12 +88,19 @@ CLROIAlignLayerKernel::CLROIAlignLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLROIAlignLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+void CLROIAlignLayerKernel::configure(const ICLTensor *input,
+ const ICLTensor *rois,
+ ICLTensor *output,
+ const ROIPoolingLayerInfo &pool_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
}
-void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *rois,
+ ICLTensor *output,
+ const ROIPoolingLayerInfo &pool_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, rois);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), rois->info(), output->info(), pool_info));
@@ -97,7 +110,7 @@ void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, c
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
output->info()->set_data_layout(input->info()->data_layout());
- auto padding_info = get_padding_info({ input, rois, output });
+ auto padding_info = get_padding_info({input, rois, output});
_input = input;
_output = output;
@@ -111,16 +124,23 @@ void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, c
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
build_opts.add_option("-DDATA_SIZE=" + get_data_size_from_data_type(input->info()->data_type()));
- build_opts.add_option("-DMAX_DIM_X=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::WIDTH))));
- build_opts.add_option("-DMAX_DIM_Y=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT))));
- build_opts.add_option("-DMAX_DIM_Z=" + support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL))));
+ build_opts.add_option("-DMAX_DIM_X=" +
+ support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(
+ input->info()->data_layout(), DataLayoutDimension::WIDTH))));
+ build_opts.add_option("-DMAX_DIM_Y=" +
+ support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(
+ input->info()->data_layout(), DataLayoutDimension::HEIGHT))));
+ build_opts.add_option("-DMAX_DIM_Z=" +
+ support::cpp11::to_string(_input->info()->dimension(get_data_layout_dimension_index(
+ input->info()->data_layout(), DataLayoutDimension::CHANNEL))));
build_opts.add_option("-DPOOLED_DIM_X=" + support::cpp11::to_string(pool_info.pooled_width()));
build_opts.add_option("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height()));
build_opts.add_option("-DSPATIAL_SCALE=" + float_to_string_with_full_precision(pool_info.spatial_scale()));
build_opts.add_option_if(input->info()->data_layout() == DataLayout::NHWC, "-DNHWC");
- build_opts.add_option_if(pool_info.sampling_ratio() > 0, "-DSAMPLING_RATIO=" + support::cpp11::to_string(pool_info.sampling_ratio()));
+ build_opts.add_option_if(pool_info.sampling_ratio() > 0,
+ "-DSAMPLING_RATIO=" + support::cpp11::to_string(pool_info.sampling_ratio()));
- if(is_qasymm)
+ if (is_qasymm)
{
const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
const UniformQuantizationInfo roisq_info = rois->info()->quantization_info().uniform();
@@ -144,7 +164,10 @@ void CLROIAlignLayerKernel::configure(const CLCompileContext &compile_context, c
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLROIAlignLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
+Status CLROIAlignLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, rois, output, pool_info));
return Status{};
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.h b/src/core/CL/kernels/CLROIAlignLayerKernel.h
index 5284a5913f..2e84e5d303 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.h
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.h
@@ -61,7 +61,8 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ void
+ configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -77,7 +78,11 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *rois,
+ ICLTensor *output,
+ const ROIPoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -93,7 +98,10 @@ public:
*
* @return a Status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue);
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index f6933c6cfd..1b2c414a49 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -31,6 +31,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -48,7 +49,10 @@ CLROIPoolingLayerKernel::CLROIPoolingLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
+Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ const ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output);
@@ -61,10 +65,11 @@ Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensor
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0));
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) || (output->dimension(1) != pool_info.pooled_height()));
+ ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) ||
+ (output->dimension(1) != pool_info.pooled_height()));
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != output->dimension(2));
ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(1) != output->dimension(3));
}
@@ -72,20 +77,30 @@ Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensor
return Status{};
}
-void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+void CLROIPoolingLayerKernel::configure(const ICLTensor *input,
+ const ICLTensor *rois,
+ ICLTensor *output,
+ const ROIPoolingLayerInfo &pool_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
}
-void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *rois,
+ const ICLTensor *output,
+ const ROIPoolingLayerInfo &pool_info)
{
- ARM_COMPUTE_ERROR_THROW_ON(CLROIPoolingLayerKernel::validate(input->info(), rois->info(), output->info(), pool_info));
+ ARM_COMPUTE_ERROR_THROW_ON(
+ CLROIPoolingLayerKernel::validate(input->info(), rois->info(), output->info(), pool_info));
- auto padding_info = get_padding_info({ input, rois, output });
+ auto padding_info = get_padding_info({input, rois, output});
// Output auto initialization if not yet initialized
- TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->info()->dimension(2), rois->info()->dimension(1));
- auto_init_if_empty(*(output->info()), output_shape, 1, input->info()->data_type(), output->info()->quantization_info());
+ TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->info()->dimension(2),
+ rois->info()->dimension(1));
+ auto_init_if_empty(*(output->info()), output_shape, 1, input->info()->data_type(),
+ output->info()->quantization_info());
// Set instance variables
_input = input;
@@ -107,11 +122,12 @@ void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context,
build_opts.add_option("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height()));
build_opts.add_option("-DSPATIAL_SCALE=" + support::cpp11::to_string(pool_info.spatial_scale()));
- if(is_qasymm)
+ if (is_qasymm)
{
// Determine quantization info scale, offset
UniformQuantizationInfo uqinfo = UniformQuantizationInfo();
- uqinfo = compute_requantization_scale_offset(_input->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform());
+ uqinfo = compute_requantization_scale_offset(_input->info()->quantization_info().uniform(),
+ _output->info()->quantization_info().uniform());
build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(uqinfo.offset));
build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(uqinfo.scale));
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.h b/src/core/CL/kernels/CLROIPoolingLayerKernel.h
index 7b7b457632..80bfb63092 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.h
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.h
@@ -59,7 +59,8 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ void
+ configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -74,7 +75,11 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *rois,
+ const ICLTensor *output,
+ const ROIPoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -92,7 +97,10 @@ public:
* @note The z dimensions of @p output tensor and @p input tensor must be the same.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *rois,
+ const ITensorInfo *output,
+ const ROIPoolingLayerInfo &pool_info);
private:
const ICLTensor *_input;
diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp
index a06c2eed75..622f6210b9 100644
--- a/src/core/CL/kernels/CLRangeKernel.cpp
+++ b/src/core/CL/kernels/CLRangeKernel.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -42,11 +43,8 @@ constexpr unsigned int vector_size_byte_opencl = 16;
Status validate_arguments(const ITensorInfo *output, const float start, const float end, const float step)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output,
- 1,
- DataType::U8, DataType::S8, DataType::QASYMM8,
- DataType::U16, DataType::S16,
- DataType::U32, DataType::S32,
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S8, DataType::QASYMM8,
+ DataType::U16, DataType::S16, DataType::U32, DataType::S32,
DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(output);
@@ -56,19 +54,22 @@ Status validate_arguments(const ITensorInfo *output, const float start, const fl
ARM_COMPUTE_RETURN_ERROR_ON_MSG((start == end), "start of the requested sequence must not be equal to the end");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(start, output->data_type(), output->quantization_info()), "start value is outside the range of the data type");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(end, output->data_type(), output->quantization_info()), "end value is outside the range of the data type");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(step, output->data_type(), output->quantization_info()), "step value is outside the range of the data type");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(start, output->data_type(), output->quantization_info()),
+ "start value is outside the range of the data type");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(end, output->data_type(), output->quantization_info()),
+ "end value is outside the range of the data type");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!check_value_range(step, output->data_type(), output->quantization_info()),
+ "step value is outside the range of the data type");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->num_dimensions() != 1, "Output has to be a 1-D tensor");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() < num_of_elements_in_range(start, end, step), "Output tensor size is incorrect");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() < num_of_elements_in_range(start, end, step),
+ "Output tensor size is incorrect");
return Status{};
}
} // namespace
-CLRangeKernel::CLRangeKernel()
- : _start(0), _end(1), _step(1), _output(nullptr)
+CLRangeKernel::CLRangeKernel() : _start(0), _end(1), _step(1), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -78,16 +79,18 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float
configure(CLKernelLibrary::get().get_compile_context(), output, start, end, step);
}
-void CLRangeKernel::configure(const CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step)
+void CLRangeKernel::configure(
+ const CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(output->info(), start, end, step));
// Configure kernel window
- unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / output->info()->element_size(), output->info()->dimension(0));
- Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+ unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / output->info()->element_size(), output->info()->dimension(0));
+ Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
- auto padding_info = get_padding_info({ output });
+ auto padding_info = get_padding_info({output});
_start = start;
_end = end;
@@ -100,10 +103,11 @@ void CLRangeKernel::configure(const CLCompileContext &compile_context, ICLTensor
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(output->info()->dimension(0) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" +
+ support::cpp11::to_string(output->info()->dimension(0) % num_elems_processed_per_iteration));
build_opts.add_option("-DSTART=" + support::cpp11::to_string(start));
build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step));
- if(is_data_type_quantized_asymmetric(output->info()->data_type()))
+ if (is_data_type_quantized_asymmetric(output->info()->data_type()))
{
const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset));
diff --git a/src/core/CL/kernels/CLRangeKernel.h b/src/core/CL/kernels/CLRangeKernel.h
index 1b94a099ed..65251a11e5 100644
--- a/src/core/CL/kernels/CLRangeKernel.h
+++ b/src/core/CL/kernels/CLRangeKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLRANGEKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index e5cfb997ca..70875a2d40 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -28,15 +28,15 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
+
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
namespace arm_compute
@@ -47,23 +47,28 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- if(input->num_channels() == 1)
+ if (input->num_channels() == 1)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S32, DataType::F16, DataType::F32);
}
else
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 2, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(axis == 0);
}
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8, "Not supported reduction operation for QASYMM8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(op == ReductionOperation::SUM_SQUARE && input->data_type() == DataType::QASYMM8,
+ "Not supported reduction operation for QASYMM8");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions,
+ "Reduction axis greater than max number of dimensions");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
- ARM_COMPUTE_RETURN_ERROR_ON((op == ReductionOperation::MEAN_SUM) && (axis == 0) && (input->dimension(0) == 0) && (input->data_type() != DataType::QASYMM8)
- && (input->data_type() != DataType::QASYMM8_SIGNED));
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN), "Not supported reduction operation, use CLArgMinMaxLayer");
+ ARM_COMPUTE_RETURN_ERROR_ON((op == ReductionOperation::MEAN_SUM) && (axis == 0) && (input->dimension(0) == 0) &&
+ (input->data_type() != DataType::QASYMM8) &&
+ (input->data_type() != DataType::QASYMM8_SIGNED));
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN),
+ "Not supported reduction operation, use CLArgMinMaxLayer");
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -79,33 +84,42 @@ CLReductionOperationKernel::CLReductionOperationKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op)
+void CLReductionOperationKernel::configure(const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int axis,
+ ReductionOperation op)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op);
}
-void CLReductionOperationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op)
+void CLReductionOperationKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int axis,
+ ReductionOperation op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
_reduction_axis = axis;
_op = op;
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, true);
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).reset_padding().set_is_resizable(true));
+ const TensorShape output_shape =
+ arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, true);
+ auto_init_if_empty(*output->info(),
+ input->info()->clone()->set_tensor_shape(output_shape).reset_padding().set_is_resizable(true));
// Set build options
CLBuildOptions build_opts;
DataType data_type = input->info()->data_type();
std::string data_type_promoted{};
- if(is_data_type_quantized(data_type))
+ if (is_data_type_quantized(data_type))
{
data_type_promoted = "int";
}
@@ -130,10 +144,14 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte
build_opts.add_option_if(op == ReductionOperation::PROD, "-DPROD");
build_opts.add_option_if(op == ReductionOperation::MIN, "-DMIN");
build_opts.add_option_if(op == ReductionOperation::MAX, "-DMAX");
- build_opts.add_option_if(is_data_type_quantized(data_type), "-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().uniform().offset));
- build_opts.add_option_if(is_data_type_quantized(data_type), "-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().uniform().scale));
-
- switch(op)
+ build_opts.add_option_if(is_data_type_quantized(data_type),
+ "-DOFFSET=" +
+ support::cpp11::to_string(input->info()->quantization_info().uniform().offset));
+ build_opts.add_option_if(
+ is_data_type_quantized(data_type),
+ "-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().uniform().scale));
+
+ switch (op)
{
case ReductionOperation::SUM_SQUARE:
build_opts.add_option(("-DOPERATION=square_sum"));
@@ -159,7 +177,7 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte
std::string kernel_axis_name;
const bool is_serial_op = needs_serialized_reduction(_op, _input->info()->data_type(), _reduction_axis);
- switch(axis)
+ switch (axis)
{
case 0:
{
@@ -187,13 +205,17 @@ void CLReductionOperationKernel::configure(const CLCompileContext &compile_conte
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps(vec_size));
- win.set(Window::DimX, Window::Dimension(win.x().start(), win.x().end() * _input->info()->num_channels(), win.x().step()));
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), win.x().end() * _input->info()->num_channels(), win.x().step()));
ICLKernel::configure_internal(win);
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
+Status CLReductionOperationKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ unsigned int axis,
+ ReductionOperation op)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
return Status{};
@@ -205,18 +227,19 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
const bool is_serial_op = needs_serialized_reduction(_op, _input->info()->data_type(), _reduction_axis);
- switch(_reduction_axis)
+ switch (_reduction_axis)
{
case 0:
{
// We use parallel reduction only in non quantized types
- if(is_serial_op)
+ if (is_serial_op)
{
// Get first input and output slices
- Window window_in{ window };
- window_in.set(Window::DimX, Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
+ Window window_in{window};
+ window_in.set(Window::DimX,
+ Window::Dimension(0, _input->info()->dimension(0), _input->info()->dimension(0)));
- Window out_window{ window };
+ Window out_window{window};
out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
Window in_slice = window_in.first_slice_window_1D();
@@ -228,8 +251,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
add_1D_tensor_argument(idx, _input, in_slice);
add_1D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice);
- }
- while(window_in.slide_window_slice_1D(in_slice) && out_window.slide_window_slice_1D(out_slice));
+ } while (window_in.slide_window_slice_1D(in_slice) && out_window.slide_window_slice_1D(out_slice));
}
else
{
@@ -251,8 +273,9 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
case 1:
{
// Get first input and output slices
- Window window_in{ window };
- window_in.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1)));
+ Window window_in{window};
+ window_in.set(Window::DimY,
+ Window::Dimension(0, _input->info()->dimension(1), _input->info()->dimension(1)));
Window in_slice = window_in.first_slice_window_2D();
Window out_slice = window.first_slice_window_2D();
@@ -262,15 +285,15 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
add_2D_tensor_argument(idx, _input, in_slice);
add_2D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice);
- }
- while(window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
+ } while (window_in.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
}
break;
case 2:
{
// Get first input and output slices
- Window window_in{ window };
- window_in.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2)));
+ Window window_in{window};
+ window_in.set(Window::DimZ,
+ Window::Dimension(0, _input->info()->dimension(2), _input->info()->dimension(2)));
Window in_slice = window_in.first_slice_window_3D();
Window out_slice = window.first_slice_window_3D();
@@ -280,14 +303,13 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
add_3D_tensor_argument(idx, _input, in_slice);
add_3D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice);
- }
- while(window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice));
+ } while (window_in.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice));
}
break;
case 3:
{
// Get first input and output slices
- Window window_in{ window };
+ Window window_in{window};
window_in.set(3, Window::Dimension(0, 1, 1));
Window in_slice = window_in.first_slice_window_4D();
Window out_slice = window.first_slice_window_4D();
@@ -298,8 +320,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que
add_4D_tensor_argument(idx, _input, in_slice);
add_4D_tensor_argument(idx, _output, out_slice);
enqueue(queue, *this, in_slice);
- }
- while(window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice));
+ } while (window_in.slide_window_slice_4D(in_slice) && window.slide_window_slice_4D(out_slice));
}
break;
default:
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.h b/src/core/CL/kernels/CLReductionOperationKernel.h
index b456378746..2f94b2add3 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.h
+++ b/src/core/CL/kernels/CLReductionOperationKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -67,7 +68,11 @@ public:
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
* @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ unsigned int axis,
+ ReductionOperation op);
/** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
*
@@ -79,7 +84,8 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
+ static Status
+ validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLReorgLayerKernel.cpp b/src/core/CL/kernels/CLReorgLayerKernel.cpp
index 3c74e80d33..9fd21943e8 100644
--- a/src/core/CL/kernels/CLReorgLayerKernel.cpp
+++ b/src/core/CL/kernels/CLReorgLayerKernel.cpp
@@ -28,9 +28,10 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -51,13 +52,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
ARM_COMPUTE_RETURN_ERROR_ON(stride <= 0);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_width] % stride) != 0, "The width of the input tensor must be a multiple of stride");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_height] % stride) != 0, "The height of the input tensor must be a multiple of stride");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_width] % stride) != 0,
+ "The width of the input tensor must be a multiple of stride");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((input->tensor_shape()[idx_height] % stride) != 0,
+ "The height of the input tensor must be a multiple of stride");
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- const TensorInfo tensor_info_output = output->clone()->set_tensor_shape(misc::shape_calculator::compute_reorg_output_shape(*input, stride));
+ const TensorInfo tensor_info_output =
+ output->clone()->set_tensor_shape(misc::shape_calculator::compute_reorg_output_shape(*input, stride));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -66,8 +70,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
}
} // namespace
-CLReorgLayerKernel::CLReorgLayerKernel()
- : _input(nullptr), _output(nullptr)
+CLReorgLayerKernel::CLReorgLayerKernel() : _input(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -77,17 +80,22 @@ void CLReorgLayerKernel::configure(const ICLTensor *input, ICLTensor *output, in
configure(CLKernelLibrary::get().get_compile_context(), input, output, stride);
}
-void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride)
+void CLReorgLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ int32_t stride)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride));
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
_input = input;
_output = output;
- std::string kernel_name = std::string("reorg_layer_") + lower_string(string_from_data_layout(input->info()->data_layout()));
- const size_t idx_channel = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
+ std::string kernel_name =
+ std::string("reorg_layer_") + lower_string(string_from_data_layout(input->info()->data_layout()));
+ const size_t idx_channel =
+ get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL);
// Create kernel
CLBuildOptions build_opts;
@@ -98,7 +106,9 @@ void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, cons
// Configure window
// auto inizialize the output tensor if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_reorg_output_shape(*input->info(), stride)));
+ auto_init_if_empty(*output->info(),
+ input->info()->clone()->set_tensor_shape(
+ misc::shape_calculator::compute_reorg_output_shape(*input->info(), stride)));
Window win = calculate_max_window(*output->info(), Steps());
@@ -119,7 +129,9 @@ void CLReorgLayerKernel::configure(const CLCompileContext &compile_context, cons
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLReorgLayerKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output, int32_t stride)
+Status CLReorgLayerKernel::validate(const arm_compute::ITensorInfo *input,
+ const arm_compute::ITensorInfo *output,
+ int32_t stride)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, stride));
@@ -139,7 +151,6 @@ void CLReorgLayerKernel::run(const Window &window, cl::CommandQueue &queue)
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_3D(slice));
+ } while (window.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLReorgLayerKernel.h b/src/core/CL/kernels/CLReorgLayerKernel.h
index 455a6170c6..f335071e9f 100644
--- a/src/core/CL/kernels/CLReorgLayerKernel.h
+++ b/src/core/CL/kernels/CLReorgLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLREORGLAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
diff --git a/src/core/CL/kernels/CLReverseKernel.cpp b/src/core/CL/kernels/CLReverseKernel.cpp
index 0d70ff4f3c..79a0f03b1e 100644
--- a/src/core/CL/kernels/CLReverseKernel.cpp
+++ b/src/core/CL/kernels/CLReverseKernel.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -49,7 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->dimension(0) > 4, "Only up to 4 dimensions can be reversed");
// Checks performed when output is configured
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -60,8 +61,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
}
} // namespace
-CLReverseKernel::CLReverseKernel()
- : _input(nullptr), _output(nullptr), _axis(nullptr)
+CLReverseKernel::CLReverseKernel() : _input(nullptr), _output(nullptr), _axis(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -71,10 +71,13 @@ void CLReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const
configure(CLKernelLibrary::get().get_compile_context(), input, output, axis);
}
-void CLReverseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
+void CLReverseKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *axis)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, axis);
- auto padding_info = get_padding_info({ input, output, axis });
+ auto padding_info = get_padding_info({input, output, axis});
_input = input;
_output = output;
@@ -138,7 +141,6 @@ void CLReverseKernel::run(const Window &window, cl::CommandQueue &queue)
add_1D_tensor_argument(idx, _axis, axis_slice);
add_4D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_4D(slice));
+ } while (collapsed.slide_window_slice_4D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLReverseKernel.h b/src/core/CL/kernels/CLReverseKernel.h
index 4a21e4f802..fbd99dc883 100644
--- a/src/core/CL/kernels/CLReverseKernel.h
+++ b/src/core/CL/kernels/CLReverseKernel.h
@@ -60,7 +60,10 @@ public:
* @param[out] output Output tensor. Data type supported: Same as @p input
* @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const ICLTensor *axis);
/** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
*
diff --git a/src/core/CL/kernels/CLSelectKernel.cpp b/src/core/CL/kernels/CLSelectKernel.cpp
index c0e014e8b8..703c64d8d3 100644
--- a/src/core/CL/kernels/CLSelectKernel.cpp
+++ b/src/core/CL/kernels/CLSelectKernel.cpp
@@ -30,10 +30,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
namespace arm_compute
@@ -51,9 +51,11 @@ Status validate_arguments(const ITensorInfo *c, const ITensorInfo *x, const ITen
const bool is_same_rank = (c->tensor_shape().num_dimensions() == x->tensor_shape().num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(is_same_rank && (x->tensor_shape() != c->tensor_shape()));
- ARM_COMPUTE_RETURN_ERROR_ON(!is_same_rank && ((c->tensor_shape().num_dimensions() > 1) || (c->tensor_shape().x() != x->tensor_shape()[x->tensor_shape().num_dimensions() - 1])));
+ ARM_COMPUTE_RETURN_ERROR_ON(!is_same_rank &&
+ ((c->tensor_shape().num_dimensions() > 1) ||
+ (c->tensor_shape().x() != x->tensor_shape()[x->tensor_shape().num_dimensions() - 1])));
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(x, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(x, output);
@@ -63,13 +65,16 @@ Status validate_arguments(const ITensorInfo *c, const ITensorInfo *x, const ITen
}
} // namespace
-CLSelectKernel::CLSelectKernel()
- : _c(nullptr), _x(nullptr), _y(nullptr), _output(nullptr), _has_same_rank(false)
+CLSelectKernel::CLSelectKernel() : _c(nullptr), _x(nullptr), _y(nullptr), _output(nullptr), _has_same_rank(false)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLSelectKernel::configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output)
+void CLSelectKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *c,
+ const ICLTensor *x,
+ const ICLTensor *y,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(c, x, y, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(c->info(), x->info(), y->info(), output->info()));
@@ -80,7 +85,7 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC
_output = output;
_has_same_rank = (c->info()->tensor_shape().num_dimensions() == x->info()->tensor_shape().num_dimensions());
- auto padding_info = get_padding_info({ c, x, y, output });
+ auto padding_info = get_padding_info({c, x, y, output});
const unsigned int vec_size_x = adjust_vec_size(16 / x->info()->element_size(), x->info()->dimension(0));
const int vec_size_x_leftovers = output->info()->dimension(0) % vec_size_x;
@@ -92,14 +97,14 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC
// Create kernel
std::string kernel_name = "select";
- if(_has_same_rank)
+ if (_has_same_rank)
{
kernel_name += "_same_rank";
}
else
{
const bool is_input_rank_greater_than_two = x->info()->tensor_shape().num_dimensions() > 2;
- if(is_input_rank_greater_than_two)
+ if (is_input_rank_greater_than_two)
{
const size_t width = x->info()->tensor_shape().x();
const size_t height = x->info()->tensor_shape().y();
@@ -128,7 +133,8 @@ void CLSelectKernel::configure(const CLCompileContext &compile_context, const IC
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLSelectKernel::validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output)
+Status
+CLSelectKernel::validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(c, x, y, output));
return Status{};
@@ -142,7 +148,7 @@ void CLSelectKernel::run(const arm_compute::Window &window, cl::CommandQueue &qu
Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
Window slice = collapsed.first_slice_window_3D();
- if(!_has_same_rank)
+ if (!_has_same_rank)
{
Window vector_slice = window.first_slice_window_1D();
vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
@@ -153,7 +159,7 @@ void CLSelectKernel::run(const arm_compute::Window &window, cl::CommandQueue &qu
do
{
unsigned int idx = _has_same_rank ? 0 : num_arguments_per_1D_tensor();
- if(_has_same_rank)
+ if (_has_same_rank)
{
add_3D_tensor_argument(idx, _c, slice);
}
@@ -162,7 +168,6 @@ void CLSelectKernel::run(const arm_compute::Window &window, cl::CommandQueue &qu
add_3D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_3D(slice));
+ } while (collapsed.slide_window_slice_3D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLSelectKernel.h b/src/core/CL/kernels/CLSelectKernel.h
index b8c10cd7cf..c4256fd743 100644
--- a/src/core/CL/kernels/CLSelectKernel.h
+++ b/src/core/CL/kernels/CLSelectKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLSELECTKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -60,7 +61,11 @@ public:
* @param[out] y Second input tensor. Data types supported: Same as @p x
* @param[in] output Output tensor. Data types supported: Same as @p x.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *c,
+ const ICLTensor *x,
+ const ICLTensor *y,
+ ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel
*
* @param[in] c Condition input tensor. Data types supported: U8.
diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
index 3632ae2b03..f4c0839ad2 100644
--- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -38,19 +39,22 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *paddings, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *block_info,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, paddings, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(block_info->tensor_shape(), TensorShape{ 2 });
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(block_info->tensor_shape(), TensorShape{2});
ARM_COMPUTE_RETURN_ERROR_ON(paddings->num_dimensions() > 2);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(paddings->tensor_shape(), TensorShape{ 2, 2 });
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(paddings->tensor_shape(), TensorShape{2, 2});
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
const DataLayout data_layout = input->data_layout();
const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
@@ -61,7 +65,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf
return Status{};
}
-Status validate_arguments_static(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+Status validate_arguments_static(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -70,9 +78,10 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape
ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x < 1 || block_shape_y < 1);
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- TensorShape expected_output_shape = misc::shape_calculator::compute_space_to_batch_shape(input, block_shape_x, block_shape_y, padding_left, padding_right);
+ TensorShape expected_output_shape = misc::shape_calculator::compute_space_to_batch_shape(
+ input, block_shape_x, block_shape_y, padding_left, padding_right);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), expected_output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
@@ -88,16 +97,24 @@ CLSpaceToBatchLayerKernel::CLSpaceToBatchLayerKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
+void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input,
+ const ICLTensor *block_shape,
+ const ICLTensor *paddings,
+ ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output);
}
-void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
+void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *block_shape,
+ const ICLTensor *paddings,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info()));
- auto padding_info = get_padding_info({ input, block_shape, paddings, output });
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info()));
+ auto padding_info = get_padding_info({input, block_shape, paddings, output});
_input = input;
_block_shape = block_shape;
@@ -111,14 +128,17 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
+ build_opts.add_option("-DDATA_TYPE=" +
+ get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width)));
build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height)));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch)));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height)));
build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch)));
- _kernel = create_kernel(compile_context, "space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
+ _kernel = create_kernel(compile_context,
+ "space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
@@ -126,22 +146,34 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
- ICLTensor *output)
+void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ICLTensor *output)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+ configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left,
+ padding_right, output);
}
-void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left,
- const Size2D &padding_right,
- ICLTensor *output)
+void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- TensorShape output_shape = misc::shape_calculator::compute_space_to_batch_shape(input->info(), block_shape_x, block_shape_y, padding_left, padding_right);
- auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ TensorShape output_shape = misc::shape_calculator::compute_space_to_batch_shape(
+ input->info(), block_shape_x, block_shape_y, padding_left, padding_right);
+ auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
+ input->info()->quantization_info());
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, padding_left, padding_right, output->info()));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, padding_left,
+ padding_right, output->info()));
_input = input;
_output = output;
@@ -153,7 +185,8 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
+ build_opts.add_option("-DDATA_TYPE=" +
+ get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
build_opts.add_option("-DWIDTH_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_width)));
build_opts.add_option("-DHEIGHT_OUT=" + support::cpp11::to_string(output->info()->dimension(idx_height)));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_batch)));
@@ -166,22 +199,32 @@ void CLSpaceToBatchLayerKernel::configure(const CLCompileContext &compile_contex
build_opts.add_option("-DPAD_RIGHT_X=" + support::cpp11::to_string(padding_right.x()));
build_opts.add_option("-DPAD_LEFT_Y=" + support::cpp11::to_string(padding_left.y()));
build_opts.add_option("-DPAD_RIGHT_Y=" + support::cpp11::to_string(padding_right.y()));
- _kernel = create_kernel(compile_context, "space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
+ _kernel = create_kernel(
+ compile_context, "space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
ICLKernel::configure_internal(win);
}
-Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
+Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *block_shape,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, paddings, output));
return Status{};
}
-Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+Status CLSpaceToBatchLayerKernel::validate(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
const ITensorInfo *output)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, padding_left, padding_right, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments_static(input, block_shape_x, block_shape_y, padding_left, padding_right, output));
return Status{};
}
@@ -218,7 +261,6 @@ void CLSpaceToBatchLayerKernel::run(const Window &window, cl::CommandQueue &queu
add_3D_tensor_argument(idx, _output, slice_out);
enqueue(queue, *this, slice_out, lws_hint());
++batch_id;
- }
- while(window.slide_window_slice_3D(slice_out));
+ } while (window.slide_window_slice_3D(slice_out));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h
index 4817cfeef2..f9dce9db47 100644
--- a/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h
+++ b/src/core/CL/kernels/CLSpaceToBatchLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -63,7 +64,11 @@ public:
* @param[in] paddings 2-D tensor with shape [2, M] (First dimension is the fastest-changing dimension). Supported M: 2. Data types supported: S32
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const ICLTensor *block_shape,
+ const ICLTensor *paddings,
+ ICLTensor *output);
/** Initialise the kernel's input and output. (Static block shape and paddings)
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -73,7 +78,12 @@ public:
* @param[in] padding_right The padding at the end of every dimension of the output tensor.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+ void configure(const ICLTensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ICLTensor *output);
/** Initialise the kernel's input and output. (Static block shape and paddings)
*
* @param[in] compile_context The compile context to be used.
@@ -84,8 +94,13 @@ public:
* @param[in] padding_right The padding at the end of every dimension of the output tensor.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
- ICLTensor *output);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -95,7 +110,10 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *block_shape,
+ const ITensorInfo *paddings,
+ const ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings)
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -107,7 +125,12 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ const int block_shape_x,
+ const int block_shape_y,
+ const Size2D &padding_left,
+ const Size2D &padding_right,
+ const ITensorInfo *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
index c5ffdb588b..25662b5c62 100644
--- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -45,7 +46,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1);
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
const DataLayout data_layout = input->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -64,8 +65,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
}
} // namespace
-CLSpaceToDepthLayerKernel::CLSpaceToDepthLayerKernel()
- : _input(nullptr), _output(nullptr), _block_shape()
+CLSpaceToDepthLayerKernel::CLSpaceToDepthLayerKernel() : _input(nullptr), _output(nullptr), _block_shape()
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -75,10 +75,13 @@ void CLSpaceToDepthLayerKernel::configure(const ICLTensor *input, ICLTensor *out
configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
}
-void CLSpaceToDepthLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
+void CLSpaceToDepthLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ int32_t block_shape)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- auto padding_info = get_padding_info({ input, output });
+ auto padding_info = get_padding_info({input, output});
TensorShape output_shape = compute_space_to_depth_shape(input->info(), block_shape);
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type());
@@ -94,11 +97,14 @@ void CLSpaceToDepthLayerKernel::configure(const CLCompileContext &compile_contex
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(output->info()->data_type())));
+ build_opts.add_option("-DDATA_TYPE=" +
+ get_cl_unsigned_type_from_element_size(data_size_from_type(output->info()->data_type())));
build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_channel)));
build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(output->info()->dimension(idx_width)));
- _kernel = create_kernel(compile_context, "space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
+ _kernel = create_kernel(compile_context,
+ "space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())),
+ build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
@@ -136,7 +142,6 @@ void CLSpaceToDepthLayerKernel::run(const Window &window, cl::CommandQueue &queu
enqueue(queue, *this, slice_out, lws_hint());
++batch_id;
- }
- while(window.slide_window_slice_3D(slice_out));
+ } while (window.slide_window_slice_3D(slice_out));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h
index bb1ac5f9a6..d0932919e0 100644
--- a/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h
+++ b/src/core/CL/kernels/CLSpaceToDepthLayerKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -61,7 +62,8 @@ public:
* @param[out] output Tensor output. Data types supported: same as @p input
* @param[in] block_shape Block shape value.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ void
+ configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
/** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel.
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp
index 075c93ab60..23e26716e7 100644
--- a/src/core/CL/kernels/CLStackLayerKernel.cpp
+++ b/src/core/CL/kernels/CLStackLayerKernel.cpp
@@ -30,10 +30,10 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
-
#include "support/StringSupport.h"
using namespace arm_compute::misc::shape_calculator;
@@ -42,7 +42,11 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *input,
+ unsigned int axis,
+ unsigned int idx_input,
+ unsigned int num_tensors,
+ const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
@@ -51,9 +55,10 @@ Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned
ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_stack_shape(*input, axis, num_tensors));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(),
+ compute_stack_shape(*input, axis, num_tensors));
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
}
@@ -61,7 +66,8 @@ Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int axis, unsigned int num_tensors, ITensorInfo *output)
+std::pair<Status, Window>
+validate_and_configure_window(ITensorInfo *input, unsigned int axis, unsigned int num_tensors, ITensorInfo *output)
{
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_stack_shape(*input, axis, num_tensors)));
@@ -73,18 +79,23 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsi
}
} // namespace
-CLStackLayerKernel::CLStackLayerKernel()
- : _input(nullptr), _output(nullptr)
+CLStackLayerKernel::CLStackLayerKernel() : _input(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
-void CLStackLayerKernel::configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output)
+void CLStackLayerKernel::configure(
+ const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, axis, idx_input, num_tensors, output);
}
-void CLStackLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output)
+void CLStackLayerKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ unsigned int axis,
+ unsigned int idx_input,
+ unsigned int num_tensors,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), axis, idx_input, num_tensors, output->info()));
@@ -112,10 +123,15 @@ void CLStackLayerKernel::configure(const CLCompileContext &compile_context, cons
_kernel.setArg<cl_uint>(idx, idx_input);
}
-Status CLStackLayerKernel::validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output)
+Status CLStackLayerKernel::validate(const ITensorInfo *input,
+ unsigned int axis,
+ unsigned int idx_input,
+ unsigned int num_tensors,
+ const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, axis, idx_input, num_tensors, output));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), axis, num_tensors, output->clone().get()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), axis, num_tensors, output->clone().get()).first);
return Status{};
}
diff --git a/src/core/CL/kernels/CLStackLayerKernel.h b/src/core/CL/kernels/CLStackLayerKernel.h
index 2865127a90..d3c17f529c 100644
--- a/src/core/CL/kernels/CLStackLayerKernel.h
+++ b/src/core/CL/kernels/CLStackLayerKernel.h
@@ -26,6 +26,7 @@
#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
namespace arm_compute
@@ -60,7 +61,8 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
*/
- void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
+ void configure(
+ const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
/** Initialise the kernel's inputs and output
*
* @note Supported input tensor rank: up to 4
@@ -74,7 +76,12 @@ public:
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ unsigned int axis,
+ unsigned int idx_input,
+ unsigned int num_tensors,
+ ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
*
* @note Supported input tensor rank: up to 4
@@ -88,7 +95,11 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *input,
+ unsigned int axis,
+ unsigned int idx_input,
+ unsigned int num_tensors,
+ const ITensorInfo *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index 9acbafdb19..a8f6112820 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -22,11 +22,13 @@
* SOFTWARE.
*/
#include "src/core/CL/kernels/CLStridedSliceKernel.h"
+
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/helpers/bit_ops.h"
@@ -37,9 +39,14 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+Status validate_arguments(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
@@ -48,19 +55,16 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
ARM_COMPUTE_RETURN_ERROR_ON(starts.num_dimensions() > input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(ends.num_dimensions() > input->num_dimensions());
ARM_COMPUTE_RETURN_ERROR_ON(strides.num_dimensions() > input->num_dimensions());
- ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i)
- {
- return i == 0;
- }));
+ ARM_COMPUTE_RETURN_ERROR_ON(
+ std::any_of(strides.cbegin(), strides.cbegin() + strides.num_dimensions(), [](int i) { return i == 0; }));
// Get expected output shape
- const TensorShape exp_output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(*input,
- starts, ends, strides,
- begin_mask, end_mask, shrink_axis_mask);
+ const TensorShape exp_output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(
+ *input, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
ARM_COMPUTE_RETURN_ERROR_ON(exp_output_shape.total_size() == 0);
// Checks output if configured
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
const TensorInfo exp_output_info = output->clone()->set_tensor_shape(exp_output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &exp_output_info);
@@ -76,28 +80,33 @@ CLStridedSliceKernel::CLStridedSliceKernel()
_type = CLKernelType::ELEMENTWISE;
}
-void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+void CLStridedSliceKernel::configure(const CLCompileContext &compile_context,
+ const ITensorInfo *input,
+ ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- auto padding_info = get_padding_info({ input, output });
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask));
+ auto padding_info = get_padding_info({input, output});
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask));
const TensorShape &input_shape = input->tensor_shape();
Coordinates starts_abs;
Coordinates ends_abs;
Coordinates final_strides;
- std::tie(starts_abs, ends_abs, final_strides) = arm_compute::helpers::tensor_transform::calculate_strided_slice_coords(
- input_shape,
- starts, ends, strides,
- begin_mask, end_mask, shrink_axis_mask);
+ std::tie(starts_abs, ends_abs, final_strides) =
+ arm_compute::helpers::tensor_transform::calculate_strided_slice_coords(input_shape, starts, ends, strides,
+ begin_mask, end_mask, shrink_axis_mask);
// Configure kernel window
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(*input,
- starts, ends, strides,
- begin_mask, end_mask, shrink_axis_mask);
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_strided_slice_shape(
+ *input, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
Window win = calculate_max_window(*output, Steps());
@@ -108,29 +117,33 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co
const bool multi_access_x = !is_shrink_on_x && (final_strides.x() == 1) && (output_width_x / vec_size_x > 0);
// Update window if needed
- if(multi_access_x)
+ if (multi_access_x)
{
Window &updated_window = win;
updated_window.set(Window::DimX,
- Window::Dimension(updated_window.x().start(), ceil_to_multiple(updated_window.x().end(), vec_size_x), vec_size_x));
+ Window::Dimension(updated_window.x().start(),
+ ceil_to_multiple(updated_window.x().end(), vec_size_x), vec_size_x));
}
ICLKernel::configure_internal(win);
// Create build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type())));
- for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i)
+ build_opts.add_option("-DDATA_TYPE=" +
+ get_cl_unsigned_type_from_element_size(data_size_from_type(input->data_type())));
+ for (unsigned int i = 0; i < input_shape.num_dimensions(); ++i)
{
const bool is_shrink = arm_compute::helpers::bit_ops::is_bit_set(shrink_axis_mask, i);
- build_opts.add_option("-DSTART_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(starts_abs[i]));
- build_opts.add_option("-DSTRIDE_" + support::cpp11::to_string(i) + "=" + support::cpp11::to_string(final_strides[i]));
+ build_opts.add_option("-DSTART_" + support::cpp11::to_string(i) + "=" +
+ support::cpp11::to_string(starts_abs[i]));
+ build_opts.add_option("-DSTRIDE_" + support::cpp11::to_string(i) + "=" +
+ support::cpp11::to_string(final_strides[i]));
build_opts.add_option_if(is_shrink, "-DSHRINK_" + support::cpp11::to_string(i));
}
- build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+ build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(
+ std::max<int>(output_width_x - vec_size_x, 0)));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option_if_else(input_shape.num_dimensions() > 2,
- "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()),
- "-DSRC_DEPTH=1");
+ "-DSRC_DEPTH=" + support::cpp11::to_string(input_shape.z()), "-DSRC_DEPTH=1");
build_opts.add_option_if_else(output->num_dimensions() > 2,
"-DDST_DEPTH=" + support::cpp11::to_string(output->tensor_shape().z()),
"-DDST_DEPTH=1");
@@ -142,7 +155,7 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co
_config_id = "strided_slice";
_config_id += "_";
_config_id += lower_string(string_from_data_type(input->data_type()));
- for(unsigned int i = 0; i < input_shape.num_dimensions(); ++i)
+ for (unsigned int i = 0; i < input_shape.num_dimensions(); ++i)
{
_config_id += "_";
_config_id += support::cpp11::to_string(input->dimension(i));
@@ -156,11 +169,17 @@ void CLStridedSliceKernel::configure(const CLCompileContext &compile_context, co
ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
}
-Status CLStridedSliceKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+Status CLStridedSliceKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask));
return Status{};
}
@@ -170,8 +189,9 @@ void CLStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, cl
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
- const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+ const auto src =
+ utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
Window slice = window_collapsed.first_slice_window_4D();
@@ -182,7 +202,6 @@ void CLStridedSliceKernel::run_op(ITensorPack &tensors, const Window &window, cl
add_4D_tensor_argument(idx, src, slice);
add_4D_tensor_argument(idx, dst, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(window_collapsed.slide_window_slice_4D(slice));
+ } while (window_collapsed.slide_window_slice_4D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.h b/src/core/CL/kernels/CLStridedSliceKernel.h
index 4c201504f5..1cf5bcacec 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.h
+++ b/src/core/CL/kernels/CLStridedSliceKernel.h
@@ -25,6 +25,7 @@
#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
#include "arm_compute/core/Types.h"
+
#include "src/core/CL/ICLKernel.h"
#include <cstdint>
@@ -53,9 +54,15 @@ public:
* @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+ void configure(const CLCompileContext &compile_context,
+ const ITensorInfo *input,
+ ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask);
/** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
*
@@ -71,9 +78,14 @@ public:
* @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
* A slice of size 1 starting from starts[i] in the dimension must be preserved.
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+ static Status validate(const ITensorInfo *input,
+ const ITensorInfo *output,
+ const Coordinates &starts,
+ const Coordinates &ends,
+ const BiStrides &strides,
+ int32_t begin_mask,
+ int32_t end_mask,
+ int32_t shrink_axis_mask);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
diff --git a/src/core/CL/kernels/CLTileKernel.cpp b/src/core/CL/kernels/CLTileKernel.cpp
index 3e7015cfd2..fa996c4008 100644
--- a/src/core/CL/kernels/CLTileKernel.cpp
+++ b/src/core/CL/kernels/CLTileKernel.cpp
@@ -22,9 +22,11 @@
* SOFTWARE.
*/
#include "src/core/CL/kernels/CLTileKernel.h"
+
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
@@ -39,15 +41,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(multiples.size() > 4);
ARM_COMPUTE_RETURN_ERROR_ON(multiples.empty());
- ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e)
- {
- return e == 0;
- }));
+ ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e) { return e == 0; }));
// Validate output if initialized
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
+ misc::shape_calculator::compute_tiled_shape(input->tensor_shape(), multiples), output->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -55,8 +55,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
}
} // namespace
-CLTileKernel::CLTileKernel()
- : _input(nullptr), _output(nullptr)
+CLTileKernel::CLTileKernel() : _input(nullptr), _output(nullptr)
{
_type = CLKernelType::ELEMENTWISE;
}
@@ -66,7 +65,10 @@ void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Mu
configure(CLKernelLibrary::get().get_compile_context(), input, output, multiples);
}
-void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
+void CLTileKernel::configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const Multiples &multiples)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
@@ -104,15 +106,14 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT
// Configure window without padding
Window win = calculate_max_window(*output->info());
- if(multi_access_x)
+ if (multi_access_x)
{
// If multi-access is enabled, no thread should cross the tile boundaries. This means we need
// as many threads as those to cover a single tile times multiples[0]. Note that if threads
// do not cross the boundaries of the tiles, they won't cross the boundaries of the last tile, and
// we don't need to pad the output
const unsigned int size_win_x = ceil_to_multiple(input->info()->dimension(0), vec_size_x) * multiples[0];
- win.set(Window::DimX,
- Window::Dimension(win.x().start(), size_win_x, vec_size_x));
+ win.set(Window::DimX, Window::Dimension(win.x().start(), size_win_x, vec_size_x));
}
ICLKernel::configure_internal(win);
@@ -121,7 +122,7 @@ void CLTileKernel::configure(const CLCompileContext &compile_context, const ICLT
_config_id = "tile";
_config_id += "_";
_config_id += lower_string(string_from_data_type(input->info()->data_type()));
- for(unsigned int i = 0; i < multiples.size(); ++i)
+ for (unsigned int i = 0; i < multiples.size(); ++i)
{
_config_id += "_";
_config_id += support::cpp11::to_string(input->info()->dimension(i));
@@ -150,7 +151,6 @@ void CLTileKernel::run(const Window &window, cl::CommandQueue &queue)
add_4D_tensor_argument(idx, _input, slice);
add_4D_tensor_argument(idx, _output, slice);
enqueue(queue, *this, slice, lws_hint());
- }
- while(collapsed.slide_window_slice_4D(slice));
+ } while (collapsed.slide_window_slice_4D(slice));
}
} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLTileKernel.h b/src/core/CL/kernels/CLTileKernel.h
index 41752ca90b..c3486aecef 100644
--- a/src/core/CL/kernels/CLTileKernel.h
+++ b/src/core/CL/kernels/CLTileKernel.h
@@ -64,7 +64,10 @@ public:
* @param[out] output Destination tensor. Same as @p input
*
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *input,
+ ICLTensor *output,
+ const Multiples &multiples);
/** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
*
* @param[in] input Source tensor info. Data type supported: All.