diff options
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 77 | ||||
-rw-r--r-- | src/core/CL/kernels/CLReductionOperationKernel.cpp | 70 |
2 files changed, 119 insertions, 28 deletions
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 36e351e048..3d30350c59 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,18 +42,60 @@ CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel() { } -void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon) +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_UNUSED(epsilon); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + + // Reduce shape on axis + TensorShape sum_shape = input->tensor_shape(); + sum_shape.set(axis, 1); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW); + } + + return Status{}; +} + +std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); + const unsigned int num_elems_processed_per_iteration = 16; + + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + + // Output tensor auto initialization if not yet initialized + auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type(), input->fixed_point_position()); - // Sum and output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, input->valid_region()); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + + return std::make_tuple(err, win); +} +} // namespace + +void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon)); _input = input; _sum = sum; @@ -76,15 +118,18 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor _kernel.setArg<cl_uint>(idx, _epsilon); // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + auto win_config = validate_and_configure_window(_input->info(), _output->info()); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + ICLKernel::configure(std::get<1>(win_config)); +} - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); +Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon)); + ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); - ICLKernel::configure(win); + return Status{}; } void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 1dd5eb97ec..1347a9bc94 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -38,6 +38,52 @@ using namespace arm_compute; +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +{ + ARM_COMPUTE_UNUSED(op); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW); + } + + return Status{}; +} + +std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int axis) +{ + // Output tensor auto initialization if not yet initialized + TensorShape output_shape{ input->tensor_shape() }; + output_shape.set(axis, 1); + auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position()); + + const unsigned int num_elems_processed_per_iteration = 16; + + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + const unsigned int border_width = ((input->dimension(0) % 128) != 0) ? 128 - input->dimension(0) % 128 : 0; // TODO (COMPMID-1143): Fix padding (possible value 127!) + + AccessWindowStatic input_access(input, 0, 0, input->dimension(0) + border_width, 1); + AccessWindowHorizontal output_access(output, 0, 1); + + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, output->valid_region()); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + + return std::make_tuple(err, win); +} +} // namespace + CLReductionOperationKernel::CLReductionOperationKernel() : _input(nullptr), _output(nullptr), _reduction_axis(0), _op(ReductionOperation::SUM_SQUARE), _border_size() { @@ -50,17 +96,13 @@ BorderSize CLReductionOperationKernel::border_size() const void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output tensor auto initialization if not yet initialized TensorShape output_shape{ input->info()->tensor_shape() }; output_shape.set(axis, 1); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); const unsigned int num_elems_processed_per_iteration = 16; const unsigned int border_width = ((input->info()->dimension(0) % 128) != 0) ? 128 - input->info()->dimension(0) % 128 : 0; @@ -97,15 +139,19 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("reduction_operation", build_opts)); // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis); - AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + border_width, 1); - AccessWindowHorizontal output_access(output->info(), 0, 1); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, output->info()->valid_region()); + ICLKernel::configure(std::get<1>(win_config)); +} + +Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); + ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), axis))); - ICLKernel::configure(win); + return Status{}; } void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &queue) |