From 473cb01e84cef6cab057e9492bfa3b68f708e5d7 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 23 Feb 2021 11:48:12 +0000 Subject: Remove Compute Vision CL support Resolves COMPMID-4151 Change-Id: I46f541efe8c4087f27794d2e158b6c1547d459ba Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5160 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 104 ---- src/core/CL/kernels/CLAbsoluteDifferenceKernel.h | 79 --- src/core/CL/kernels/CLAccumulateKernel.cpp | 101 ---- src/core/CL/kernels/CLAccumulateKernel.h | 114 ----- src/core/CL/kernels/CLBox3x3Kernel.cpp | 81 --- src/core/CL/kernels/CLBox3x3Kernel.h | 59 --- src/core/CL/kernels/CLCannyEdgeKernel.cpp | 310 ------------ src/core/CL/kernels/CLCannyEdgeKernel.h | 185 ------- src/core/CL/kernels/CLChannelCombineKernel.cpp | 296 ----------- src/core/CL/kernels/CLChannelCombineKernel.h | 102 ---- src/core/CL/kernels/CLChannelExtractKernel.cpp | 196 -------- src/core/CL/kernels/CLChannelExtractKernel.h | 95 ---- src/core/CL/kernels/CLColorConvertKernel.cpp | 558 --------------------- src/core/CL/kernels/CLColorConvertKernel.h | 121 ----- src/core/CL/kernels/CLConvolutionKernel.cpp | 392 --------------- src/core/CL/kernels/CLConvolutionKernel.h | 224 --------- src/core/CL/kernels/CLDerivativeKernel.cpp | 155 ------ src/core/CL/kernels/CLDerivativeKernel.h | 83 --- src/core/CL/kernels/CLDilateKernel.cpp | 70 --- src/core/CL/kernels/CLDilateKernel.h | 59 --- src/core/CL/kernels/CLErodeKernel.cpp | 70 --- src/core/CL/kernels/CLErodeKernel.h | 59 --- src/core/CL/kernels/CLFastCornersKernel.cpp | 209 -------- src/core/CL/kernels/CLFastCornersKernel.h | 133 ----- src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 81 --- src/core/CL/kernels/CLGaussian3x3Kernel.h | 59 --- src/core/CL/kernels/CLGaussian5x5Kernel.cpp | 55 -- src/core/CL/kernels/CLGaussian5x5Kernel.h | 83 --- src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 247 --------- src/core/CL/kernels/CLGaussianPyramidKernel.h | 111 ---- src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 237 --------- src/core/CL/kernels/CLHOGDescriptorKernel.h | 122 ----- src/core/CL/kernels/CLHOGDetectorKernel.cpp | 146 ------ src/core/CL/kernels/CLHOGDetectorKernel.h | 96 ---- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 149 ------ src/core/CL/kernels/CLHarrisCornersKernel.h | 100 ---- src/core/CL/kernels/CLHistogramKernel.cpp | 253 ---------- src/core/CL/kernels/CLHistogramKernel.h | 111 ---- src/core/CL/kernels/CLIntegralImageKernel.cpp | 146 ------ src/core/CL/kernels/CLIntegralImageKernel.h | 86 ---- src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 176 ------- src/core/CL/kernels/CLMagnitudePhaseKernel.h | 90 ---- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 156 ------ src/core/CL/kernels/CLMeanStdDevKernel.h | 98 ---- src/core/CL/kernels/CLMedian3x3Kernel.cpp | 88 ---- src/core/CL/kernels/CLMedian3x3Kernel.h | 59 --- src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 246 --------- src/core/CL/kernels/CLMinMaxLocationKernel.h | 124 ----- src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 104 ---- src/core/CL/kernels/CLNonLinearFilterKernel.h | 77 --- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 78 --- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.h | 60 --- src/core/CL/kernels/CLScharr3x3Kernel.cpp | 127 ----- src/core/CL/kernels/CLScharr3x3Kernel.h | 97 ---- src/core/CL/kernels/CLSobel3x3Kernel.cpp | 141 ------ src/core/CL/kernels/CLSobel3x3Kernel.h | 83 --- src/core/CL/kernels/CLSobel5x5Kernel.cpp | 251 --------- src/core/CL/kernels/CLSobel5x5Kernel.h | 139 ----- src/core/CL/kernels/CLSobel7x7Kernel.cpp | 255 ---------- src/core/CL/kernels/CLSobel7x7Kernel.h | 139 ----- src/core/CL/kernels/CLTableLookupKernel.cpp | 68 --- src/core/CL/kernels/CLTableLookupKernel.h | 55 -- src/core/CL/kernels/CLThresholdKernel.cpp | 81 --- src/core/CL/kernels/CLThresholdKernel.h | 57 --- src/core/CL/kernels/CLWarpAffineKernel.cpp | 133 ----- src/core/CL/kernels/CLWarpAffineKernel.h | 62 --- src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 105 ---- src/core/CL/kernels/CLWarpPerspectiveKernel.h | 59 --- 68 files changed, 9145 deletions(-) delete mode 100644 src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp delete mode 100644 src/core/CL/kernels/CLAbsoluteDifferenceKernel.h delete mode 100644 src/core/CL/kernels/CLAccumulateKernel.cpp delete mode 100644 src/core/CL/kernels/CLAccumulateKernel.h delete mode 100644 src/core/CL/kernels/CLBox3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLBox3x3Kernel.h delete mode 100644 src/core/CL/kernels/CLCannyEdgeKernel.cpp delete mode 100644 src/core/CL/kernels/CLCannyEdgeKernel.h delete mode 100644 src/core/CL/kernels/CLChannelCombineKernel.cpp delete mode 100644 src/core/CL/kernels/CLChannelCombineKernel.h delete mode 100644 src/core/CL/kernels/CLChannelExtractKernel.cpp delete mode 100644 src/core/CL/kernels/CLChannelExtractKernel.h delete mode 100644 src/core/CL/kernels/CLColorConvertKernel.cpp delete mode 100644 src/core/CL/kernels/CLColorConvertKernel.h delete mode 100644 src/core/CL/kernels/CLConvolutionKernel.cpp delete mode 100644 src/core/CL/kernels/CLConvolutionKernel.h delete mode 100644 src/core/CL/kernels/CLDerivativeKernel.cpp delete mode 100644 src/core/CL/kernels/CLDerivativeKernel.h delete mode 100644 src/core/CL/kernels/CLDilateKernel.cpp delete mode 100644 src/core/CL/kernels/CLDilateKernel.h delete mode 100644 src/core/CL/kernels/CLErodeKernel.cpp delete mode 100644 src/core/CL/kernels/CLErodeKernel.h delete mode 100644 src/core/CL/kernels/CLFastCornersKernel.cpp delete mode 100644 src/core/CL/kernels/CLFastCornersKernel.h delete mode 100644 src/core/CL/kernels/CLGaussian3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLGaussian3x3Kernel.h delete mode 100644 src/core/CL/kernels/CLGaussian5x5Kernel.cpp delete mode 100644 src/core/CL/kernels/CLGaussian5x5Kernel.h delete mode 100644 src/core/CL/kernels/CLGaussianPyramidKernel.cpp delete mode 100644 src/core/CL/kernels/CLGaussianPyramidKernel.h delete mode 100644 src/core/CL/kernels/CLHOGDescriptorKernel.cpp delete mode 100644 src/core/CL/kernels/CLHOGDescriptorKernel.h delete mode 100644 src/core/CL/kernels/CLHOGDetectorKernel.cpp delete mode 100644 src/core/CL/kernels/CLHOGDetectorKernel.h delete mode 100644 src/core/CL/kernels/CLHarrisCornersKernel.cpp delete mode 100644 src/core/CL/kernels/CLHarrisCornersKernel.h delete mode 100644 src/core/CL/kernels/CLHistogramKernel.cpp delete mode 100644 src/core/CL/kernels/CLHistogramKernel.h delete mode 100644 src/core/CL/kernels/CLIntegralImageKernel.cpp delete mode 100644 src/core/CL/kernels/CLIntegralImageKernel.h delete mode 100644 src/core/CL/kernels/CLMagnitudePhaseKernel.cpp delete mode 100644 src/core/CL/kernels/CLMagnitudePhaseKernel.h delete mode 100644 src/core/CL/kernels/CLMeanStdDevKernel.cpp delete mode 100644 src/core/CL/kernels/CLMeanStdDevKernel.h delete mode 100644 src/core/CL/kernels/CLMedian3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLMedian3x3Kernel.h delete mode 100644 src/core/CL/kernels/CLMinMaxLocationKernel.cpp delete mode 100644 src/core/CL/kernels/CLMinMaxLocationKernel.h delete mode 100644 src/core/CL/kernels/CLNonLinearFilterKernel.cpp delete mode 100644 src/core/CL/kernels/CLNonLinearFilterKernel.h delete mode 100644 src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h delete mode 100644 src/core/CL/kernels/CLScharr3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLScharr3x3Kernel.h delete mode 100644 src/core/CL/kernels/CLSobel3x3Kernel.cpp delete mode 100644 src/core/CL/kernels/CLSobel3x3Kernel.h delete mode 100644 src/core/CL/kernels/CLSobel5x5Kernel.cpp delete mode 100644 src/core/CL/kernels/CLSobel5x5Kernel.h delete mode 100644 src/core/CL/kernels/CLSobel7x7Kernel.cpp delete mode 100644 src/core/CL/kernels/CLSobel7x7Kernel.h delete mode 100644 src/core/CL/kernels/CLTableLookupKernel.cpp delete mode 100644 src/core/CL/kernels/CLTableLookupKernel.h delete mode 100644 src/core/CL/kernels/CLThresholdKernel.cpp delete mode 100644 src/core/CL/kernels/CLThresholdKernel.h delete mode 100644 src/core/CL/kernels/CLWarpAffineKernel.cpp delete mode 100644 src/core/CL/kernels/CLWarpAffineKernel.h delete mode 100644 src/core/CL/kernels/CLWarpPerspectiveKernel.cpp delete mode 100644 src/core/CL/kernels/CLWarpPerspectiveKernel.h (limited to 'src/core/CL/kernels') diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp deleted file mode 100644 index 76b60cb9f8..0000000000 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" - -#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -CLAbsoluteDifferenceKernel::CLAbsoluteDifferenceKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output); -} - -void CLAbsoluteDifferenceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_MSG(output->info()->data_type() == DataType::U8 && (input1->info()->data_type() != DataType::U8 || input2->info()->data_type() != DataType::U8), - "The output image can only be U8 if both input images are U8"); - - _input1 = input1; - _input2 = input2; - _output = output; - - // Set kernel build options - std::set build_opts; - build_opts.insert("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); - build_opts.insert("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); - build_opts.insert("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - _kernel = create_kernel(compile_context, "absdiff", build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - - Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input1_access, input2_access, output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), - input2->info()->valid_region()); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input1, slice); - add_2D_tensor_argument(idx, _input2, slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h deleted file mode 100644 index 28f28fe44f..0000000000 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H -#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the absolute difference kernel. - * - * Absolute difference is computed by: - * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] - */ -class CLAbsoluteDifferenceKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLAbsoluteDifferenceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete; - /** Allow instances of this class to be moved */ - CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default; - /** Allow instances of this class to be moved */ - CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default; - /** Default destructor */ - ~CLAbsoluteDifferenceKernel() = default; - - /** Set the inputs and output images. - * - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - /** Set the inputs and output images. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source tensor. Data types supported: U8/S16. - * @param[in] input2 Source tensor. Data types supported: U8/S16. - * @param[out] output Destination tensor. Data types supported: U8/S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input1; /**< Source tensor 1. */ - const ICLTensor *_input2; /**< Source tensor 2. */ - ICLTensor *_output; /**< Destination tensor. */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */ diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp deleted file mode 100644 index b0a8eba644..0000000000 --- a/src/core/CL/kernels/CLAccumulateKernel.cpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLAccumulateKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/IAccessWindow.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" - -namespace arm_compute -{ -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; -} // namespace - -void CLAccumulateKernel::configure(const ICLTensor *input, ICLTensor *accum) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, accum); -} - -void CLAccumulateKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16); - - // Create kernel - _kernel = create_kernel(compile_context, "accumulate"); - - // Make sure _kernel is initialized before calling the parent's configure - ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration); -} - -void CLAccumulateWeightedKernel::configure(const ICLTensor *input, float alpha, ICLTensor *accum) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, alpha, accum); -} - -void CLAccumulateWeightedKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(alpha < 0.0 || alpha > 1.0); - - // Create kernel - _kernel = create_kernel(compile_context, "accumulate_weighted"); - - // Set static kernel arguments - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, alpha); - - // Configure kernel window - ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration); -} - -void CLAccumulateSquaredKernel::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, shift, accum); -} - -void CLAccumulateSquaredKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON(shift > 15); - - // Create kernel - _kernel = create_kernel(compile_context, "accumulate_squared"); - - // Set static kernel arguments - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, shift); - - // Configure kernel window - ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLAccumulateKernel.h b/src/core/CL/kernels/CLAccumulateKernel.h deleted file mode 100644 index 16a715319d..0000000000 --- a/src/core/CL/kernels/CLAccumulateKernel.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H -#define ARM_COMPUTE_CLACCUMULATEKERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the accumulate kernel. - * - * Accumulation is computed by: - * @f[ accum(x,y) = accum(x,y) + input(x,y) @f] - */ -class CLAccumulateKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation tensors. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] accum Destination tensor. Data types supported: S16. - */ - void configure(const ICLTensor *input, ICLTensor *accum); - /** Set the input and accumulation tensors. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] accum Destination tensor. Data types supported: S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum); -}; - -/** Interface for the accumulate weighted kernel. - * - * Weighted accumulation is computed: - * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f] - * - * Where @f$ 0 \le \alpha \le 1 @f$ - * Conceptually, the rounding for this is defined as: - * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f] -*/ -class CLAccumulateWeightedKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation images, and the scale value. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. - * @param[in,out] accum Accumulated tensor. Data types supported: U8. - */ - void configure(const ICLTensor *input, float alpha, ICLTensor *accum); - /** Set the input and accumulation images, and the scale value. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32. - * @param[in,out] accum Accumulated tensor. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum); -}; - -/** Interface for the accumulate squared kernel. - * - * The accumulation of squares is computed: - * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f] - * - * Where @f$ 0 \le shift \le 15 @f$ -*/ -class CLAccumulateSquaredKernel : public ICLSimple2DKernel -{ -public: - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. - * @param[in,out] accum Accumulated tensor. Data types supported: S16. - */ - void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum); - /** Set the input and accumulation tensors and the shift value. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32. - * @param[in,out] accum Accumulated tensor. Data types supported: S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp deleted file mode 100644 index 9f493b4fb8..0000000000 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLBox3x3Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -BorderSize CLBox3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLBox3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _input = input; - _output = output; - - // Set build options - std::set build_opts = { "-DMAT0=1", "-DMAT1=1", "-DMAT2=1", - "-DMAT3=1", "-DMAT4=1", "-DMAT5=1", - "-DMAT6=1", "-DMAT7=1", "-DMAT8=1", - "-DSCALE=9", "-DDATA_TYPE_OUT=uchar" - }; - - // Create kernel - _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLBox3x3Kernel.h b/src/core/CL/kernels/CLBox3x3Kernel.h deleted file mode 100644 index 2373c4a928..0000000000 --- a/src/core/CL/kernels/CLBox3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H -#define ARM_COMPUTE_CLBOX3X3KERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the box 3x3 filter kernel. - * - */ -class CLBox3x3Kernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - //Inherited methods overriden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp deleted file mode 100644 index 1fe944c8a2..0000000000 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLCannyEdgeKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -CLGradientKernel::CLGradientKernel() - : _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr) -{ -} - -void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type) -{ - configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, norm_type); -} - -void CLGradientKernel::configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MSG(data_size_from_type(gx->info()->data_type()) != data_size_from_type(gy->info()->data_type()), - "Gx and Gy must have the same pixel size"); - ARM_COMPUTE_ERROR_ON_MSG(data_size_from_type(gx->info()->data_type()) != data_size_from_type(magnitude->info()->data_type()), - "Mag must have the same pixel size as Gx and Gy"); - - _gx = gx; - _gy = gy; - _magnitude = magnitude; - _phase = phase; - - // Create build opts - std::set built_opts; - built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(gx->info()->data_type())); - built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(gx->info()->data_type())); - - // Create kernel - const std::string kernel_name = (norm_type == 1) ? std::string("combine_gradients_L1") : std::string("combine_gradients_L2"); - _kernel = create_kernel(compile_context, kernel_name, built_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 4; - - Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access); - - mag_access.set_valid_region(win, _gx->info()->valid_region()); - phase_access.set_valid_region(win, _gx->info()->valid_region()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(gx->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(gx->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(gx->info()->dimension(1)); -} - -void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _gx, slice); - add_2D_tensor_argument(idx, _gy, slice); - add_2D_tensor_argument(idx, _magnitude, slice); - add_2D_tensor_argument(idx, _phase, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -CLEdgeNonMaxSuppressionKernel::CLEdgeNonMaxSuppressionKernel() - : _magnitude(nullptr), _phase(nullptr), _output(nullptr) -{ -} - -BorderSize CLEdgeNonMaxSuppressionKernel::border_size() const -{ - return BorderSize(1); -} - -void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), magnitude, phase, output, lower_thr, border_undefined); -} - -void CLEdgeNonMaxSuppressionKernel::configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::U32); - - _magnitude = magnitude; - _phase = phase; - _output = output; - - // Create build opts - std::set built_opts; - built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(magnitude->info()->data_type())); - built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - const std::string kernel_name = std::string("suppress_non_maximum"); - _kernel = create_kernel(compile_context, kernel_name, built_opts); - - // Set minimum threshold argument - unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, lower_thr); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - constexpr unsigned int num_elems_read_written_per_iteration = 3; - - Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top, - num_elems_read_written_per_iteration, num_elems_read_written_per_iteration); - AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, mag_access, phase_access, output_access); - - output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(output->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _magnitude, slice); - add_2D_tensor_argument(idx, _phase, slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -CLEdgeTraceKernel::CLEdgeTraceKernel() - : _input(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0), _visited(nullptr), _recorded(nullptr), _l1_stack(nullptr), _l1_stack_counter(nullptr) -{ -} - -void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, visited, recorded, l1_stack, l1_stack_counter); -} - -void CLEdgeTraceKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(visited, 1, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(recorded, 1, DataType::U32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(l1_stack, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(l1_stack_counter, 1, DataType::U8); - - _input = input; - _output = output; - _lower_thr = lower_thr; - _upper_thr = upper_thr; - _visited = visited; - _recorded = recorded; - _l1_stack = l1_stack; - _l1_stack_counter = l1_stack_counter; - - // Create build opts - std::set built_opts; - built_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); - built_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - const std::string kernel_name = std::string("hysteresis"); - _kernel = create_kernel(compile_context, kernel_name, built_opts); - - // Set constant kernel args - unsigned int width = _input->info()->dimension(0); - unsigned int height = _input->info()->dimension(1); - unsigned int idx = 6 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, static_cast(_lower_thr)); - _kernel.setArg(idx++, static_cast(_upper_thr)); - _kernel.setArg(idx++, static_cast(width)); - _kernel.setArg(idx++, static_cast(height)); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal visited_access(_visited->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal recorded_access(_recorded->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal l1_stack_access(_l1_stack->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal l1_stack_counter_access(_l1_stack_counter->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(_input->info(), 0, num_elems_processed_per_iteration), - output_access, - visited_access, - recorded_access, - l1_stack_access, - l1_stack_counter_access); - - output_access.set_valid_region(win, _input->info()->valid_region()); - visited_access.set_valid_region(win, _input->info()->valid_region()); - recorded_access.set_valid_region(win, _input->info()->valid_region()); - l1_stack_access.set_valid_region(win, _input->info()->valid_region()); - l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += lower_string(string_from_format(output->info()->format())); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, slice); - add_2D_tensor_argument(idx, _visited, slice); - add_2D_tensor_argument(idx, _recorded, slice); - add_2D_tensor_argument(idx, _l1_stack, slice); - add_2D_tensor_argument(idx, _l1_stack_counter, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.h b/src/core/CL/kernels/CLCannyEdgeKernel.h deleted file mode 100644 index 7543822d8d..0000000000 --- a/src/core/CL/kernels/CLCannyEdgeKernel.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H -#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform Gradient computation. - */ -class CLGradientKernel : public ICLKernel -{ -public: - /** Constructor */ - CLGradientKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGradientKernel(const CLGradientKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGradientKernel &operator=(const CLGradientKernel &) = delete; - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and mag must all be the same size (either 16 or 32). - * - * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. - * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. - * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. - * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. - */ - void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); - /** Initialise the kernel's sources, destinations and border mode. - * - * @note gx, gy and mag must all be the same size (either 16 or 32). - * - * @param[in] compile_context The compile context to be used. - * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32. - * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx. - * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy. - * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8. - * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_gx; /**< Source tensor - Gx component */ - const ICLTensor *_gy; /**< Source tensor - Gy component */ - ICLTensor *_magnitude; /**< Destination tensor - Magnitude */ - ICLTensor *_phase; /**< Destination tensor - Quantized phase */ -}; - -/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge. - * - * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input - * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed. - * - * @note Hysteresis is computed in @ref CLEdgeTraceKernel - */ -class CLEdgeNonMaxSuppressionKernel : public ICLKernel -{ -public: - /** Constructor */ - CLEdgeNonMaxSuppressionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete; - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U16/U32. - * @param[in] lower_thr Lower threshold. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); - /** Initialise the kernel's sources, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32. - * @param[in] phase Source tensor - Quantized phase. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: U16/U32. - * @param[in] lower_thr Lower threshold. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */ - const ICLTensor *_phase; /**< Source tensor - Quantized phase. */ - ICLTensor *_output; /**< Destination tensor. */ -}; - -/** OpenCL kernel to perform Edge tracing. - */ -class CLEdgeTraceKernel : public ICLKernel -{ -public: - /** Constructor */ - CLEdgeTraceKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete; - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U16/U32. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. - * Expected to be initialized to 0 before each run. - * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. - * Expected to be initialized to 0 before each run. - */ - void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16/U32. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] upper_thr Upper threshold used for the hysteresis - * @param[in] lower_thr Lower threshold used for the hysteresis - * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32. - * Expected to be initialized to 0 before each run. - * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32 - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32. - * Expected to be initialized to 0 before each run. - * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8. - * Expected to be initialized to 0 before each run. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, - ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor. */ - ICLTensor *_output; /**< Destination tensor. */ - int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */ - int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */ - ICLTensor *_visited; /**< Marks visited elements */ - ICLTensor *_recorded; /**< Marks recorded elements */ - ICLTensor *_l1_stack; /**< L1 hysteris stack */ - ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp deleted file mode 100644 index 52ba9dd065..0000000000 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLChannelCombineKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLMultiImage.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -namespace arm_compute -{ -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; -} // namespace - -CLChannelCombineKernel::CLChannelCombineKernel() - : _planes{ { nullptr } }, _output(nullptr), _output_multi(nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } } -{ -} - -void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output); -} - -void CLChannelCombineKernel::configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane1); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane2); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::RGB888, Format::RGBA8888, Format::YUYV422, Format::UYVY422); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8); - - const Format output_format = output->info()->format(); - - // Check if horizontal dimension of Y plane is even and validate horizontal sub-sampling dimensions for U and V planes - if(Format::YUYV422 == output_format || Format::UYVY422 == output_format) - { - // Validate Y plane of input and output - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output); - - // Validate U and V plane of the input - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2); - } - - _planes[0] = plane0; - _planes[1] = plane1; - _planes[2] = plane2; - _planes[3] = nullptr; - - // Validate the last input tensor only for RGBA format - if(Format::RGBA8888 == output_format) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(plane3); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane3); - - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane3, Format::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane3, 1, DataType::U8); - - _planes[3] = plane3; - } - - _output = output; - _output_multi = nullptr; - - // Half the processed elements for U and V channels due to horizontal sub-sampling of 2 - if(Format::YUYV422 == output_format || Format::UYVY422 == output_format) - { - _x_subsampling[1] = 2; - _x_subsampling[2] = 2; - } - - // Create kernel - std::string kernel_name = "channel_combine_" + string_from_format(output_format); - _kernel = create_kernel(compile_context, kernel_name); - - // Configure window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal plane0_access(plane0->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle plane1_access(plane1->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]); - AccessWindowRectangle plane2_access(plane2->info(), 0, 0, num_elems_processed_per_iteration, 1, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]); - AccessWindowHorizontal plane3_access(plane3 == nullptr ? nullptr : plane3->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, plane0_access, plane1_access, plane2_access, plane3_access, output_access); - - ValidRegion valid_region = intersect_valid_regions(plane0->info()->valid_region(), - plane1->info()->valid_region(), - plane2->info()->valid_region()); - if(plane3 != nullptr) - { - valid_region = intersect_valid_regions(plane3->info()->valid_region(), valid_region); - } - output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output); -} - -void CLChannelCombineKernel::configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane1); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane2); - - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane0, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane1, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(plane2, Format::U8); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane0, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane1, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(plane2, 1, DataType::U8); - - const Format output_format = output->info()->format(); - - // Validate shape of Y plane to be even and shape of sub-sampling dimensions for U and V planes - // Perform validation only for formats which require sub-sampling. - if(Format::YUV444 != output_format) - { - // Validate Y plane of input and output - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(output_format, plane0, output->plane(0)); - - // Validate U and V plane of the input - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), plane1, plane2); - - // Validate second plane U (NV12 and NV21 have a UV88 combined plane while IYUV has only the U plane) - // MultiImage generates the correct tensor shape but also check in case the tensor shape of planes was changed to a wrong size - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(1)); - - // Validate the last plane V of format IYUV - if(Format::IYUV == output_format) - { - // Validate Y plane of the output - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_SUBSAMPLED(output_format, plane0->info()->tensor_shape(), output->plane(2)); - } - } - - // Set input tensors - _planes[0] = plane0; - _planes[1] = plane1; - _planes[2] = plane2; - _planes[3] = nullptr; - - // Set output tensor - _output = nullptr; - _output_multi = output; - - bool has_two_planars = false; - - // Set sub-sampling parameters for each plane - std::string kernel_name; - std::set build_opts; - - if(Format::NV12 == output_format || Format::NV21 == output_format) - { - _x_subsampling = { { 1, 2, 2 } }; - _y_subsampling = { { 1, 2, 2 } }; - kernel_name = "channel_combine_NV"; - build_opts.emplace(Format::NV12 == output_format ? "-DNV12" : "-DNV21"); - has_two_planars = true; - } - else - { - if(Format::IYUV == output_format) - { - _x_subsampling = { { 1, 2, 2 } }; - _y_subsampling = { { 1, 2, 2 } }; - } - - kernel_name = "copy_planes_3p"; - build_opts.emplace(Format::IYUV == output_format ? "-DIYUV" : "-DYUV444"); - } - - // Create kernel - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure window - Window win = calculate_max_window(*plane0->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowRectangle input_plane0_access(plane0->info(), 0, 0, num_elems_processed_per_iteration, 1.f); - AccessWindowRectangle input_plane1_access(plane1->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]); - AccessWindowRectangle input_plane2_access(plane2->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]); - AccessWindowRectangle output_plane0_access(output->plane(0)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f, 1.f / _y_subsampling[1]); - AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[1], 1.f / _y_subsampling[1]); - AccessWindowRectangle output_plane2_access(has_two_planars ? nullptr : output->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1.f, 1.f / _x_subsampling[2], 1.f / _y_subsampling[2]); - - update_window_and_padding(win, - input_plane0_access, input_plane1_access, input_plane2_access, - output_plane0_access, output_plane1_access, output_plane2_access); - - ValidRegion plane0_valid_region = plane0->info()->valid_region(); - ValidRegion output_plane1_region = has_two_planars ? intersect_valid_regions(plane1->info()->valid_region(), plane2->info()->valid_region()) : plane2->info()->valid_region(); - output_plane0_access.set_valid_region(win, ValidRegion(plane0_valid_region.anchor, output->plane(0)->info()->tensor_shape())); - output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape())); - output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - slice.set_dimension_step(Window::DimY, 1); - - do - { - // Subsampling in plane 1 - Window win_sub_plane1(slice); - win_sub_plane1.set(Window::DimX, Window::Dimension(win_sub_plane1.x().start() / _x_subsampling[1], win_sub_plane1.x().end() / _x_subsampling[1], win_sub_plane1.x().step() / _x_subsampling[1])); - win_sub_plane1.set(Window::DimY, Window::Dimension(win_sub_plane1.y().start() / _y_subsampling[1], win_sub_plane1.y().end() / _y_subsampling[1], 1)); - - // Subsampling in plane 2 - Window win_sub_plane2(slice); - win_sub_plane2.set(Window::DimX, Window::Dimension(win_sub_plane2.x().start() / _x_subsampling[2], win_sub_plane2.x().end() / _x_subsampling[2], win_sub_plane2.x().step() / _x_subsampling[2])); - win_sub_plane2.set(Window::DimY, Window::Dimension(win_sub_plane2.y().start() / _y_subsampling[2], win_sub_plane2.y().end() / _y_subsampling[2], 1)); - - unsigned int idx = 0; - - // Set inputs - add_2D_tensor_argument(idx, _planes[0], slice); - add_2D_tensor_argument(idx, _planes[1], win_sub_plane1); - add_2D_tensor_argument(idx, _planes[2], win_sub_plane2); - add_2D_tensor_argument_if((nullptr != _planes[3]), idx, _planes[3], slice); - - // Set outputs - if(nullptr != _output) // Single planar output - { - add_2D_tensor_argument(idx, _output, slice); - } - else // Multi-planar output - { - // Reduce slice in case of subsampling to avoid out-of bounds access - slice.set(Window::DimY, Window::Dimension(slice.y().start() / _y_subsampling[1], slice.y().end() / _y_subsampling[1], 1)); - - add_2D_tensor_argument(idx, _output_multi->cl_plane(0), slice); - add_2D_tensor_argument(idx, _output_multi->cl_plane(1), win_sub_plane1); - add_2D_tensor_argument_if((3 == num_planes_from_format(_output_multi->info()->format())), idx, _output_multi->cl_plane(2), win_sub_plane2); - - _kernel.setArg(idx++, slice.y().end()); - } - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLChannelCombineKernel.h b/src/core/CL/kernels/CLChannelCombineKernel.h deleted file mode 100644 index f19995aa8e..0000000000 --- a/src/core/CL/kernels/CLChannelCombineKernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H -#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -#include -#include - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the channel combine kernel */ -class CLChannelCombineKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelCombineKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelCombineKernel(const CLChannelCombineKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelCombineKernel(CLChannelCombineKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default; - /** Default destructor */ - ~CLChannelCombineKernel() = default; - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. - */ - void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format. - * @param[out] output The single planar output tensor. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output); - /** Configure function's inputs and outputs. - * - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. - */ - void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); - /** Configure function's inputs and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format. - * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format. - * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format. - * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - std::array _planes; - ICLTensor *_output; - ICLMultiImage *_output_multi; - std::array _x_subsampling; - std::array _y_subsampling; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */ diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp deleted file mode 100644 index cbf504b98b..0000000000 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLChannelExtractKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLMultiImage.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -CLChannelExtractKernel::CLChannelExtractKernel() - : _input(nullptr), _output(nullptr), _num_elems_processed_per_iteration(8), _subsampling(1) -{ -} - -void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, channel, output); -} - -void CLChannelExtractKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON(input == output); - - set_format_if_unknown(*output->info(), Format::U8); - - // Check if input tensor has a valid format - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::RGB888, Format::RGBA8888, Format::YUYV422, Format::UYVY422); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - - // Check if channel is valid for given format - const Format format = input->info()->format(); - ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel); - - // Half the processed elements for U,V channels due to sub-sampling of 2 - _subsampling = 1; - - if(format == Format::YUYV422 || format == Format::UYVY422) - { - // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422) - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input); - - if(channel != Channel::Y) - { - _subsampling = 2; - } - } - - // Calculate output tensor shape using subsampling - TensorShape output_shape = calculate_subsampled_shape(input->info()->tensor_shape(), format, channel); - set_shape_if_empty(*output->info(), output_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - - _input = input; - _output = output; - - // Create kernel - std::string kernel_name = "channel_extract_" + string_from_format(format); - std::set build_opts = { ("-DCHANNEL_" + string_from_channel(channel)) }; - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure window - Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, _num_elems_processed_per_iteration); - AccessWindowRectangle output_access(output->info(), 0, 0, _num_elems_processed_per_iteration, 1, 1.f / _subsampling, 1.f / _subsampling); - - update_window_and_padding(win, input_access, output_access); - - ValidRegion input_valid_region = input->info()->valid_region(); - output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, channel, output); -} - -void CLChannelExtractKernel::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - - set_format_if_unknown(*output->info(), Format::U8); - - // Check if channel is valid for given format - const Format format = input->info()->format(); - ARM_COMPUTE_ERROR_ON_CHANNEL_NOT_IN_KNOWN_FORMAT(format, channel); - - // Get input plane from the given channel - const ICLImage *input_plane = input->cl_plane(plane_idx_from_channel(format, channel)); - ARM_COMPUTE_ERROR_ON_NULLPTR(input_plane); - - if(Channel::Y == channel && format != Format::YUV444) - { - // Check if the width of the tensor shape is even for formats with subsampled channels (UYVY422 and YUYV422) - ARM_COMPUTE_ERROR_ON_TENSORS_NOT_EVEN(format, input_plane); - } - - // Calculate 2x2 subsampled tensor shape - TensorShape output_shape = calculate_subsampled_shape(input->cl_plane(0)->info()->tensor_shape(), format, channel); - set_shape_if_empty(*output->info(), output_shape); - - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output_shape, output->info()->tensor_shape()); - - // Check if input tensor has a valid format - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(input, Format::NV12, Format::NV21, Format::IYUV, Format::YUV444); - ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(output, Format::U8); - - _output = output; - _input = input_plane; - _subsampling = 1; - - // Create kernel - std::string kernel_name; - std::set build_opts; - if(Channel::Y == channel || Format::IYUV == format || Format::YUV444 == format) - { - kernel_name = "copy_plane"; - } - else - { - kernel_name = "channel_extract_" + string_from_format(format); - build_opts.insert(("-DCHANNEL_" + string_from_channel(channel))); - } - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure window - Window win = calculate_max_window(*input_plane->info(), Steps(_num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input_plane->info(), 0, _num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input_plane->info()->valid_region()); - - ICLKernel::configure_internal(win); -} - -void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do - { - Window win_sub(slice); - win_sub.set(Window::DimX, Window::Dimension(win_sub.x().start() / _subsampling, win_sub.x().end() / _subsampling, win_sub.x().step() / _subsampling)); - win_sub.set(Window::DimY, Window::Dimension(win_sub.y().start() / _subsampling, win_sub.y().end() / _subsampling, 1)); - - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, win_sub); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLChannelExtractKernel.h b/src/core/CL/kernels/CLChannelExtractKernel.h deleted file mode 100644 index 37abde548c..0000000000 --- a/src/core/CL/kernels/CLChannelExtractKernel.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H -#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -#include - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the channel extract kernel */ -class CLChannelExtractKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLChannelExtractKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelExtractKernel(const CLChannelExtractKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete; - /** Allow instances of this class to be moved */ - CLChannelExtractKernel(CLChannelExtractKernel &&) = default; - /** Allow instances of this class to be moved */ - CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default; - /** Default destructor */ - ~CLChannelExtractKernel() = default; - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Must be of U8 format. - */ - void configure(const ICLTensor *input, Channel channel, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422 - * @param[in] channel Channel to extract. - * @param[out] output Destination tensor. Must be of U8 format. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar 2D destination image. Must be of U8 format. - */ - void configure(const ICLMultiImage *input, Channel channel, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444 - * @param[in] channel Channel to extract. - * @param[out] output Single-planar 2D destination image. Must be of U8 format. - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - uint32_t _num_elems_processed_per_iteration; - uint32_t _subsampling; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */ diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp deleted file mode 100644 index 6c61fec997..0000000000 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ /dev/null @@ -1,558 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLColorConvertKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLMultiImage.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/MultiImageInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include - -using namespace arm_compute; - -CLColorConvertKernel::CLColorConvertKernel() - : _input(nullptr), _output(nullptr), _multi_input(nullptr), _multi_output(nullptr) -{ -} - -void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr); - ARM_COMPUTE_ERROR_ON(output == nullptr); - - unsigned int num_elems_processed_per_iteration = 0; - switch(input->info()->format()) - { - case Format::RGBA8888: - { - switch(output->info()->format()) - { - case Format::RGB888: - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::UYVY422: - case Format::YUYV422: - { - switch(output->info()->format()) - { - case Format::RGB888: - case Format::RGBA8888: - num_elems_processed_per_iteration = 8; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::RGB888: - { - switch(output->info()->format()) - { - case Format::RGBA8888: - case Format::U8: - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - break; - } - ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported", - string_from_format(input->info()->format()).c_str(), - string_from_format(output->info()->format()).c_str()); - - std::stringstream kernel_name; - - kernel_name << string_from_format(input->info()->format()); - kernel_name << "_to_"; - kernel_name << string_from_format(output->info()->format()); - kernel_name << "_bt709"; - - _input = input; - _output = output; - - // Create kernel - _kernel = create_kernel(compile_context, kernel_name.str()); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name.str(); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON(output == nullptr); - - unsigned int num_elems_processed_per_iteration = 0; - - switch(input->info()->format()) - { - case Format::NV12: - case Format::NV21: - case Format::IYUV: - { - switch(output->info()->format()) - { - case Format::RGB888: - case Format::RGBA8888: - num_elems_processed_per_iteration = 4; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - break; - } - ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported", - string_from_format(input->info()->format()).c_str(), - string_from_format(output->info()->format()).c_str()); - - std::stringstream kernel_name; - - kernel_name << string_from_format(input->info()->format()); - kernel_name << "_to_"; - kernel_name << string_from_format(output->info()->format()); - kernel_name << "_bt709"; - - _multi_input = input; - _output = output; - - // Create kernel - _kernel = create_kernel(compile_context, kernel_name.str()); - - // Configure kernel window - const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21); - const float sub_sampling = (has_two_planes || (input->info()->format() == Format::IYUV)) ? 0.5f : 1; - - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - win.set_dimension_step(Window::DimY, 2); - - AccessWindowHorizontal plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, - sub_sampling, sub_sampling); - AccessWindowRectangle plane2_access(has_two_planes ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, - sub_sampling, sub_sampling); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - plane0_access, plane1_access, plane2_access, - output_access); - - ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(), - input->plane(2)->info()->valid_region()); - output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name.str(); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->plane(0)->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(1)); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->plane(1)->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(1)); -} - -void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(output == nullptr); - - unsigned int num_elems_processed_per_iteration = 0; - unsigned int num_elems_read_per_iteration_x = 0; - - bool has_two_planes = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21); - float sub_sampling = (has_two_planes || (output->info()->format() == Format::IYUV)) ? 0.5f : 1; - - switch(input->info()->format()) - { - case Format::RGB888: - case Format::RGBA8888: - { - switch(output->info()->format()) - { - case Format::NV12: - case Format::IYUV: - num_elems_processed_per_iteration = 2; - num_elems_read_per_iteration_x = 8; - break; - case Format::YUV444: - num_elems_processed_per_iteration = 4; - num_elems_read_per_iteration_x = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::UYVY422: - case Format::YUYV422: - { - switch(output->info()->format()) - { - case Format::NV12: - case Format::IYUV: - num_elems_processed_per_iteration = 8; - num_elems_read_per_iteration_x = 8; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - break; - } - - ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported", - string_from_format(input->info()->format()).c_str(), - string_from_format(output->info()->format()).c_str()); - - std::stringstream kernel_name; - - kernel_name << string_from_format(input->info()->format()); - kernel_name << "_to_"; - kernel_name << string_from_format(output->info()->format()); - kernel_name << "_bt709"; - _input = input; - _multi_output = output; - - // Create kernel - _kernel = create_kernel(compile_context, kernel_name.str()); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - if((input->info()->format() != Format::RGB888 || output->info()->format() != Format::YUV444) && (input->info()->format() != Format::RGBA8888 || output->info()->format() != Format::YUV444)) - { - win.set_dimension_step(Window::DimY, 2); - } - - AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling); - AccessWindowRectangle output_plane2_access(has_two_planes ? nullptr : output->plane(2)->info(), 0, 0, - num_elems_processed_per_iteration, 1, sub_sampling, sub_sampling); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_read_per_iteration_x); - - update_window_and_padding(win, - input_access, - output_plane0_access, - output_plane1_access, - output_plane2_access); - - ValidRegion input_region = input->info()->valid_region(); - - output_plane0_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(0)->info()->tensor_shape())); - output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape())); - output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name.str(); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLColorConvertKernel::configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output) -{ - unsigned int num_elems_processed_per_iteration = 0; - switch(input->info()->format()) - { - case Format::NV12: - case Format::NV21: - { - switch(output->info()->format()) - { - case Format::IYUV: - case Format::YUV444: - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - case Format::IYUV: - { - switch(output->info()->format()) - { - case Format::YUV444: - case Format::NV12: - num_elems_processed_per_iteration = 16; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - break; - } - break; - } - default: - break; - } - ARM_COMPUTE_ERROR_ON_MSG_VAR(num_elems_processed_per_iteration == 0, "Conversion from %s to %s not supported", - string_from_format(input->info()->format()).c_str(), - string_from_format(output->info()->format()).c_str()); - - std::stringstream kernel_name; - - kernel_name << string_from_format(input->info()->format()); - kernel_name << "_to_"; - kernel_name << string_from_format(output->info()->format()); - kernel_name << "_bt709"; - - _multi_input = input; - _multi_output = output; - - // Create kernel - bool has_two_input_planars = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21); - bool has_two_output_planars = (output->info()->format() == Format::NV12) || (output->info()->format() == Format::NV21); - - float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1; - float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1; - - _kernel = create_kernel(compile_context, kernel_name.str()); - - Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration)); - win.set_dimension_step(Window::DimY, 2); - - AccessWindowHorizontal input_plane0_access(input->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle input_plane1_access(input->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, - sub_sampling_input, sub_sampling_input); - AccessWindowRectangle input_plane2_access(has_two_input_planars ? nullptr : input->plane(2)->info(), 0, 0, num_elems_processed_per_iteration, 1, - sub_sampling_input, sub_sampling_input); - AccessWindowHorizontal output_plane0_access(output->plane(0)->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle output_plane1_access(output->plane(1)->info(), 0, 0, num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output); - AccessWindowRectangle output_plane2_access(has_two_output_planars ? nullptr : output->plane(2)->info(), 0, 0, - num_elems_processed_per_iteration, 1, sub_sampling_output, sub_sampling_output); - - update_window_and_padding(win, - input_plane0_access, input_plane1_access, input_plane2_access, - output_plane0_access, output_plane1_access, output_plane2_access); - - ValidRegion intersect_region = intersect_valid_regions(input->plane(0)->info()->valid_region(), input->plane(1)->info()->valid_region(), - input->plane(2)->info()->valid_region()); - output_plane0_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(0)->info()->tensor_shape())); - output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape())); - output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name.str(); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->plane(0)->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(0)->info()->dimension(1)); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->plane(1)->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->plane(1)->info()->dimension(1)); -} - -void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - if(nullptr != _input && nullptr != _output) - { - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); - } - else if(nullptr != _input && nullptr != _multi_output) - { - Format format = _multi_output->info()->format(); - do - { - Window win_uv(slice); - - if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format)) - { - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - } - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice); - for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i) - { - add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_uv); - } - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); - } - else if(nullptr != _multi_input && nullptr != _output) - { - Format format = _multi_input->info()->format(); - do - { - Window win_uv(slice); - - if((Format::NV12 == format) || (Format::NV21 == format) || (Format::IYUV == format)) - { - win_uv.set(Window::DimX, Window::Dimension(win_uv.x().start() / 2, win_uv.x().end() / 2, win_uv.x().step() / 2)); - win_uv.set(Window::DimY, Window::Dimension(win_uv.y().start() / 2, win_uv.y().end() / 2, 1)); - } - - unsigned int idx = 0; - add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice); - - for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i) - { - add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_uv); - } - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); - } - else if(nullptr != _multi_input && nullptr != _multi_output) - { - Format in_format = _multi_input->info()->format(); - Format out_format = _multi_output->info()->format(); - do - { - Window win_in_uv(slice); - if((Format::NV12 == in_format) || (Format::NV21 == in_format) || (Format::IYUV == in_format)) - { - win_in_uv.set(Window::DimX, Window::Dimension(win_in_uv.x().start() / 2, - win_in_uv.x().end() / 2, win_in_uv.x().step() / 2)); - win_in_uv.set(Window::DimY, Window::Dimension(win_in_uv.y().start() / 2, win_in_uv.y().end() / 2, 1)); - } - unsigned int idx = 0; - add_2D_tensor_argument(idx, _multi_input->cl_plane(0), slice); - for(int i = 1; i < 3 && (0 != _multi_input->cl_plane(i)->info()->num_dimensions()); ++i) - { - add_2D_tensor_argument(idx, _multi_input->cl_plane(i), win_in_uv); - } - - Window win_out_uv(slice); - if((Format::NV12 == out_format) || (Format::NV21 == out_format) || (Format::IYUV == out_format)) - { - win_out_uv.set(Window::DimX, Window::Dimension(win_out_uv.x().start() / 2, - win_out_uv.x().end() / 2, win_out_uv.x().step() / 2)); - win_out_uv.set(Window::DimY, Window::Dimension(win_out_uv.y().start() / 2, win_out_uv.y().end() / 2, 1)); - } - - add_2D_tensor_argument(idx, _multi_output->cl_plane(0), slice); - for(int i = 1; i < 3 && (0 != _multi_output->cl_plane(i)->info()->num_dimensions()); ++i) - { - add_2D_tensor_argument(idx, _multi_output->cl_plane(i), win_out_uv); - } - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); - } - else - { - ARM_COMPUTE_ERROR("Not supported"); - } -} diff --git a/src/core/CL/kernels/CLColorConvertKernel.h b/src/core/CL/kernels/CLColorConvertKernel.h deleted file mode 100644 index 0f082914cd..0000000000 --- a/src/core/CL/kernels/CLColorConvertKernel.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H -#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLMultiImage; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the color convert kernel. - * - */ -class CLColorConvertKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLColorConvertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLColorConvertKernel(const CLColorConvertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLColorConvertKernel(CLColorConvertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default; - /** Default destructor. */ - ~CLColorConvertKernel() = default; - - /** Set the input and output of the kernel - * - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888 - * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422), - * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/), - * U8 (if the formats of @p input is RGB888) - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const ICLMultiImage *input, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888 - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const ICLImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422 - * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888) - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const ICLMultiImage *input, ICLMultiImage *output); - /** Set the input and output of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV - * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV) - */ - void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /*pointer to single planar tensor input */ - ICLTensor *_output; /*pointer to single planar tensor output */ - const ICLMultiImage *_multi_input; /*pointer to multi-planar input */ - ICLMultiImage *_multi_output; /*pointer to multi-planar output */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */ diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp deleted file mode 100644 index 21f1047cc6..0000000000 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ /dev/null @@ -1,392 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLConvolutionKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/CL/ICLKernel.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include -#include - -namespace arm_compute -{ -namespace -{ -constexpr unsigned int max_matrix_size = 81; -} // namespace - -/****************************************************************************************\ - * Square Convolution * -\****************************************************************************************/ - -template -BorderSize CLConvolutionKernel::border_size() const -{ - return BorderSize(matrix_size / 2); -} - -template -void CLConvolutionKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined); -} - -template -void CLConvolutionKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON(conv == nullptr); - - _input = input; - _output = output; - - std::stringstream kernel_name; - CLBuildOptions build_opts; - kernel_name << "convolution" << matrix_size << "x" << matrix_size << "_static"; - - if(scale == 0) - { - scale = calculate_matrix_scale(conv, matrix_size); - } - - for(unsigned int i = 0; i < matrix_size * matrix_size; i++) - { - std::stringstream mat_str; - mat_str << "-DMAT" << i << "=" << conv[i]; - build_opts.add_option(mat_str.str()); - } - - build_opts.add_option("-DSCALE=" + support::cpp11::to_string(scale)); - - DataType data_type = data_type_for_convolution_matrix(conv, matrix_size * matrix_size); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); - - std::stringstream out_type; - out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type()); - build_opts.add_option(out_type.str()); - - _kernel = create_kernel(compile_context, kernel_name.str(), build_opts.options()); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = matrix_size; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} - -/****************************************************************************************\ - * Separable Convolution * -\****************************************************************************************/ -template -CLSeparableConvolutionHorKernel::CLSeparableConvolutionHorKernel() - : _border_size(0) -{ -} - -template -BorderSize CLSeparableConvolutionHorKernel::border_size() const -{ - return _border_size; -} - -template -void CLSeparableConvolutionHorKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, border_undefined); -} - -template -void CLSeparableConvolutionHorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::S16, DataType::S32); - - ARM_COMPUTE_ERROR_ON((matrix_size != 5) && (matrix_size != 7) && (matrix_size != 9)); - - _input = input; - _output = output; - _border_size = BorderSize(border_undefined ? 0 : matrix_size / 2, matrix_size / 2); - - // Set build options - std::set build_opts; - - std::array mat = { 0 }; - memcpy(mat.data(), conv, matrix_size * sizeof(int16_t)); - - for(unsigned int j = 0; j < matrix_size * matrix_size; j++) - { - build_opts.insert("-DMAT" + support::cpp11::to_string(j) + "=" + support::cpp11::to_string(mat[j])); - } - - build_opts.insert("-DSCALE=0"); - - build_opts.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); - - // Create kernel - const std::string kernel_name = "convolution_separable1x" + support::cpp11::to_string(matrix_size) + "_static"; - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowHorizontal input_access(input->info(), -border_size().left, num_elems_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -template -BorderSize CLSeparableConvolutionVertKernel::border_size() const -{ - return BorderSize{ matrix_size / 2, 0 }; -} - -template -void CLSeparableConvolutionVertKernel::configure(const ICLTensor *input, ICLTensor *output, - const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined, data_type); -} - -template -void CLSeparableConvolutionVertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, - const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON((matrix_size != 5) && (matrix_size != 7) && (matrix_size != 9)); - ARM_COMPUTE_ERROR_ON(scale == 0); - - _input = input; - _output = output; - - std::set build_opts; - - std::array mat = { 0 }; - memcpy(mat.data() + matrix_size, conv, matrix_size * sizeof(int16_t)); - - for(unsigned int j = 0; j < matrix_size * matrix_size; j++) - { - build_opts.insert("-DMAT" + support::cpp11::to_string(j) + "=" + support::cpp11::to_string(mat[j])); - } - - build_opts.insert("-DSCALE=" + support::cpp11::to_string(scale)); - - build_opts.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - - build_opts.insert("-DCOMPUTE_TYPE=" + get_cl_type_from_data_type(data_type)); - - std::stringstream out_type; - out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type()); - build_opts.insert(out_type.str()); - - // Create kernel - const std::string kernel_name = "convolution_separable" + support::cpp11::to_string(matrix_size) + "x1_static"; - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = matrix_size; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(data_type)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -/****************************************************************************************\ - * Rectangle Convolution * -\****************************************************************************************/ - -CLConvolutionRectangleKernel::CLConvolutionRectangleKernel() - : _border_size(0), _input(nullptr), _output(nullptr) -{ -} - -BorderSize CLConvolutionRectangleKernel::border_size() const -{ - return _border_size; -} - -void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, width, height, scale, border_undefined); -} - -void CLConvolutionRectangleKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, - bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON(nullptr == conv); - ARM_COMPUTE_ERROR_ON(3 != width && 5 != width && 7 != width && 9 != width); - ARM_COMPUTE_ERROR_ON(3 != height && 5 != height && 7 != height && 9 != height); - ARM_COMPUTE_ERROR_ON(0 == scale); - - _input = input; - _output = output; - _border_size = BorderSize(height / 2, width / 2); - - std::set options; - - std::stringstream output_type; - output_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type()); - options.insert(output_type.str()); - - uint32_t matrix_size = width * height; - - std::array mat = { 0 }; - - memcpy(mat.data(), conv, matrix_size * sizeof(int16_t)); - - for(unsigned int j = 0; j < max_matrix_size; j++) - { - options.insert("-DMAT" + support::cpp11::to_string(j) + "=" + support::cpp11::to_string(mat[j])); - } - - options.insert("-DSCALE=" + support::cpp11::to_string(scale)); - - DataType data_type = data_type_for_convolution_matrix(conv, matrix_size); - options.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); - - options.insert("-DMATRIX_WIDTH=" + support::cpp11::to_string(width)); - options.insert("-DMATRIX_HEIGHT=" + support::cpp11::to_string(height)); - - _kernel = create_kernel(compile_context, "convolution_rectangle", options); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - const unsigned int num_rows_read_per_iteration = height; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} - -void CLConvolutionRectangleKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -template class arm_compute::CLConvolutionKernel<3>; -template class arm_compute::CLConvolutionKernel<5>; -template class arm_compute::CLConvolutionKernel<7>; -template class arm_compute::CLConvolutionKernel<9>; -template class arm_compute::CLSeparableConvolutionVertKernel<5>; -template class arm_compute::CLSeparableConvolutionVertKernel<7>; -template class arm_compute::CLSeparableConvolutionVertKernel<9>; -template class arm_compute::CLSeparableConvolutionHorKernel<5>; -template class arm_compute::CLSeparableConvolutionHorKernel<7>; -template class arm_compute::CLSeparableConvolutionHorKernel<9>; -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLConvolutionKernel.h b/src/core/CL/kernels/CLConvolutionKernel.h deleted file mode 100644 index 33e73caf11..0000000000 --- a/src/core/CL/kernels/CLConvolutionKernel.h +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H -#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/****************************************************************************************\ - * Square Convolution * -\****************************************************************************************/ - -/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9). - * The client can supply a convolution matrix \f$ C_{m,n} \f$. - * @f{eqnarray}{ - * k_0 &=& \frac{m}{2} \\ - * l_0 &=& \frac{n}{2} \\ - * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l} - * @f} - * - * @note The above equation for this function is similar to the default OpenCV Filter2D function, - * which actually computes a correlation and not a convolution. - * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically. - */ -template -class CLConvolutionKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; - -/** Interface for the kernel which applies a 3x3 convolution to a tensor. */ -using CLConvolution3x3Kernel = CLConvolutionKernel<3>; -/** Interface for the kernel which applies a 5x5 convolution to a tensor. */ -using CLConvolution5x5Kernel = CLConvolutionKernel<5>; -/** Interface for the kernel which applies a 7x7 convolution to a tensor. */ -using CLConvolution7x7Kernel = CLConvolutionKernel<7>; -/** Interface for the kernel which applies a 9x9 convolution to a tensor. */ -using CLConvolution9x9Kernel = CLConvolutionKernel<9>; - -/****************************************************************************************\ - * Separable Square Convolution * -\****************************************************************************************/ - -/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */ -template -class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel -{ -public: - /** Default Constructor */ - CLSeparableConvolutionHorKernel(); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U16/S16/S32. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; - -private: - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */ -using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>; -/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */ -using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>; -/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */ -using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>; - -/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */ -template -class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U16/S16/S32. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16/S16/S32. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] scale Scale of the convolution matrix. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; - -/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */ -using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>; -/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */ -using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>; -/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */ -using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>; - -/****************************************************************************************\ - * Rectangle Convolution * -\****************************************************************************************/ - -/** Kernel for the running convolution on a rectangle matrix. - * - * @note Supports combinations of 3,5,7 and 9. - */ -class CLConvolutionRectangleKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLConvolutionRectangleKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete; - /** Allow instances of this class to be moved */ - CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default; - /** Allow instances of this class to be moved */ - CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8, S16. - * @param[in] conv Convolution matrix to apply to the input tensor. - * @param[in] width Width of convolution matrix (Number of columns) - * @param[in] height Height of convolution matrix (Number of rows) - * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp deleted file mode 100644 index 5ff11362cc..0000000000 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLDerivativeKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -CLDerivativeKernel::CLDerivativeKernel() - : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_derivative_x(false), _run_derivative_y(false) -{ -} - -BorderSize CLDerivativeKernel::border_size() const -{ - return BorderSize(1); -} - -void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); -} - -void CLDerivativeKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_derivative_x = output_x != nullptr; - _run_derivative_y = output_y != nullptr; - - if(_run_derivative_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_derivative_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - - // Set build options - std::set build_opts; - - if(_run_derivative_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_derivative_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - const std::string kernel_name = std::string("derivative"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_read_rows_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), 0, 0, 0, 0); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_processed_per_iteration); - if(_run_derivative_x && _run_derivative_y) - { - // TODO(COMPMID-415) Fix x-access input bug in CL kernel instead of '+2' - input_access = AccessWindowRectangle(input->info(), -border_size().left, -border_size().top, num_elems_processed_per_iteration + 2, num_read_rows_per_iteration); - } - else if(_run_derivative_x) - { - // TODO(COMPMID-415) Fix x-access input bug in CL kernel instead of '+2' - input_access = AccessWindowHorizontal(input->info(), -border_size().left, num_elems_processed_per_iteration + 2); - } - else if(_run_derivative_y) - { - input_access = AccessWindowRectangle(input->info(), 0, -border_size().top, num_elems_processed_per_iteration, num_read_rows_per_iteration); - } - - update_window_and_padding(win, - input_access, - output_x_access, - output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLDerivativeKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument_if((_run_derivative_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_derivative_y), idx, _output_y, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLDerivativeKernel.h b/src/core/CL/kernels/CLDerivativeKernel.h deleted file mode 100644 index 14dd05d084..0000000000 --- a/src/core/CL/kernels/CLDerivativeKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H -#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the derivative kernel. */ -class CLDerivativeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDerivativeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDerivativeKernel(const CLDerivativeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDerivativeKernel(CLDerivativeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default; - /** Default destructor */ - ~CLDerivativeKernel() = default; - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's sources, destination and border - * - * @note At least one of output_x or output_y must be set - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */ - ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */ - bool _run_derivative_x; /**< Do we need to run Derivative X ? */ - bool _run_derivative_y; /**< Do we need to run Derivative Y ? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp deleted file mode 100644 index cac5bc1c72..0000000000 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLDilateKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -using namespace arm_compute; - -BorderSize CLDilateKernel::border_size() const -{ - return BorderSize(1); -} - -void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLDilateKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - // Create kernel - _kernel = create_kernel(compile_context, "dilate"); - - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLDilateKernel.h b/src/core/CL/kernels/CLDilateKernel.h deleted file mode 100644 index 591ec8ccfc..0000000000 --- a/src/core/CL/kernels/CLDilateKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLDILATEKERNEL_H -#define ARM_COMPUTE_CLDILATEKERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the dilate kernel. - * - */ -class CLDilateKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp deleted file mode 100644 index f6d98a5488..0000000000 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLErodeKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -using namespace arm_compute; - -BorderSize CLErodeKernel::border_size() const -{ - return BorderSize(1); -} - -void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLErodeKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - // Create kernel - _kernel = create_kernel(compile_context, "erode"); - - _input = input; - _output = output; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_rows_read_pes_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_pes_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLErodeKernel.h b/src/core/CL/kernels/CLErodeKernel.h deleted file mode 100644 index 4da97ae358..0000000000 --- a/src/core/CL/kernels/CLErodeKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLERODEKERNEL_H -#define ARM_COMPUTE_CLERODEKERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the erode kernel. - * - */ -class CLErodeKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /**Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLERODEKERNEL_H */ diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp deleted file mode 100644 index 7481fd1c27..0000000000 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLFastCornersKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -CLFastCornersKernel::CLFastCornersKernel() - : ICLKernel(), _input(nullptr), _output(nullptr) -{ -} - -BorderSize CLFastCornersKernel::border_size() const -{ - return BorderSize(3); -} - -void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, non_max_suppression, border_mode); -} - -void CLFastCornersKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MSG(border_mode != BorderMode::UNDEFINED, "Not implemented"); - - _input = input; - _output = output; - - // Create build options - std::set build_opts; - - if(non_max_suppression) - { - build_opts.emplace("-DUSE_MAXSUPPRESSION"); - } - - // Create kernel - const std::string kernel_name = std::string("fast_corners"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Set static kernel arguments - unsigned int idx = 2 * num_arguments_per_2D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx, static_cast(threshold)); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - constexpr unsigned int num_elems_read_per_iteration = 7; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_mode == BorderMode::UNDEFINED, BorderSize(3)); - - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(non_max_suppression); - _config_id += "_"; - _config_id += lower_string(string_from_border_mode(border_mode)); -} - -void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -CLCopyToArrayKernel::CLCopyToArrayKernel() - : ICLKernel(), _input(nullptr), _corners(nullptr), _num_buffer(nullptr) -{ -} - -void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, update_number, corners, num_buffers); -} - -void CLCopyToArrayKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(corners == nullptr); - ARM_COMPUTE_ERROR_ON(num_buffers == nullptr); - - _input = input; - _corners = corners; - _num_buffer = num_buffers; - - std::set build_opts; - - if(update_number) - { - build_opts.emplace("-DUPDATE_NUMBER"); - } - - // Create kernel - const std::string kernel_name = std::string("copy_to_keypoint"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - //Get how many pixels skipped in the x dimension in the previous stages - unsigned int offset = _input->info()->valid_region().anchor.x(); - - // Set static kernel arguments - unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx++, _corners->max_num_values()); - _kernel.setArg(idx++, offset); - _kernel.setArg(idx++, *_num_buffer); - _kernel.setArg(idx++, _corners->cl_buffer()); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - //Initialise the _num_buffer as it used as both input and output - static const unsigned int zero_init = 0; - queue.enqueueWriteBuffer(*_num_buffer, CL_FALSE, 0, sizeof(unsigned int), &zero_init); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLFastCornersKernel.h b/src/core/CL/kernels/CLFastCornersKernel.h deleted file mode 100644 index 0c1b564c2f..0000000000 --- a/src/core/CL/kernels/CLFastCornersKernel.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H -#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -#include - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** CL kernel to perform fast corners */ -class CLFastCornersKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFastCornersKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFastCornersKernel(const CLFastCornersKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFastCornersKernel(CLFastCornersKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default; - /** Default destructor */ - ~CLFastCornersKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Output image. Data types supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_mode Strategy to use for borders. - */ - void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); - /** Initialise the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Output image. Data types supported: U8. - * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3. - * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise. - * @param[in] border_mode Strategy to use for borders. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode); - - // Inherited methods overridden - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLImage *_input; - ICLImage *_output; -}; - -/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */ -class CLCopyToArrayKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLCopyToArrayKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete; - /** Allow instances of this class to be moved */ - CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default; - /** Allow instances of this class to be moved */ - CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default; - /** Default destructor */ - ~CLCopyToArrayKernel() = default; - - /** Initialise the kernel. - * - * @param[in] input Source image. Data types supported: U8. - * @param[in] update_number Flag to indicate whether we need to update the number of corners - * @param[out] corners Array of keypoints to store the results. - * @param[out] num_buffers Number of keypoints to store the results. - */ - void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); - /** Initialise the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[in] update_number Flag to indicate whether we need to update the number of corners - * @param[out] corners Array of keypoints to store the results. - * @param[out] num_buffers Number of keypoints to store the results. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; /**< source image */ - ICLKeyPointArray *_corners; /**< destination array */ - cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp deleted file mode 100644 index 40e9658ab4..0000000000 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLGaussian3x3Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -BorderSize CLGaussian3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLGaussian3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _input = input; - _output = output; - - // Set build options - std::set build_opts = { "-DMAT0=1", "-DMAT1=2", "-DMAT2=1", - "-DMAT3=2", "-DMAT4=4", "-DMAT5=2", - "-DMAT6=1", "-DMAT7=2", "-DMAT8=1", - "-DSCALE=16", "-DDATA_TYPE_OUT=uchar" - }; - - // Create kernel - _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.h b/src/core/CL/kernels/CLGaussian3x3Kernel.h deleted file mode 100644 index 139b05d44c..0000000000 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H -#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the Gaussian 3x3 filter kernel. - * - */ -class CLGaussian3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp b/src/core/CL/kernels/CLGaussian5x5Kernel.cpp deleted file mode 100644 index 46a7576154..0000000000 --- a/src/core/CL/kernels/CLGaussian5x5Kernel.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLGaussian5x5Kernel.h" - -#include - -using namespace arm_compute; - -void CLGaussian5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLGaussian5x5HorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - const std::array matrix = { 1, 4, 6, 4, 1 }; - - // Set arguments - CLSeparableConvolution5x5HorKernel::configure(compile_context, input, output, matrix.data(), border_undefined); -} - -void CLGaussian5x5VertKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLGaussian5x5VertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - const uint32_t scale = 256; - const std::array matrix = { 1, 4, 6, 4, 1 }; - - // Set arguments - CLSeparableConvolution5x5VertKernel::configure(compile_context, input, output, matrix.data(), scale, border_undefined); -} diff --git a/src/core/CL/kernels/CLGaussian5x5Kernel.h b/src/core/CL/kernels/CLGaussian5x5Kernel.h deleted file mode 100644 index 711710b3b3..0000000000 --- a/src/core/CL/kernels/CLGaussian5x5Kernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H -#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H - -#include "src/core/CL/kernels/CLConvolutionKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */ -class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel -{ -public: - /** Initialise the kernel's source, destination and border. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - -private: - //Make the configure method of the parent class private - using CLSeparableConvolution5x5HorKernel::configure; -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */ -class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel -{ -public: - /** Initialise the kernel's source, destination and border. - * - * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16. - * @param[out] output Destination tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - -private: - //Make the configure method of the parent class private - using CLSeparableConvolution5x5VertKernel::configure; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */ diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp deleted file mode 100644 index 065f7f7e92..0000000000 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLGaussianPyramidKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -CLGaussianPyramidHorKernel::CLGaussianPyramidHorKernel() - : _l2_load_offset(0) -{ -} - -BorderSize CLGaussianPyramidHorKernel::border_size() const -{ - return BorderSize{ 0, 2 }; -} - -void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLGaussianPyramidHorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); - - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); - } - - _input = input; - _output = output; - - // Create kernel - const std::string kernel_name = std::string("gaussian1x5_sub_x"); - _kernel = create_kernel(compile_context, kernel_name); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - constexpr unsigned int num_elems_read_per_iteration = 20; - constexpr unsigned int num_elems_written_per_iteration = 8; - const float scale_x = static_cast(output->info()->dimension(0)) / input->info()->dimension(0); - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration, scale_x); - - // Sub sampling selects odd pixels (1, 3, 5, ...) for images with even - // width and even pixels (0, 2, 4, ...) for images with odd width. (Whether - // a pixel is even or odd is determined based on the tensor shape not the - // valid region!) - // Thus the offset from which the first pixel (L2) for the convolution is - // loaded depends on the anchor and shape of the valid region. - // In the case of an even shape (= even image width) we need to load L2 - // from -2 if the anchor is odd and from -1 if the anchor is even. That - // makes sure that L2 is always loaded from an odd pixel. - // On the other hand, for an odd shape (= odd image width) we need to load - // L2 from -1 if the anchor is odd and from -2 if the anchor is even to - // achieve the opposite effect. - // The condition can be simplified to checking whether anchor + shape is - // odd (-2) or even (-1) as only adding an odd and an even number will have - // an odd result. - _l2_load_offset = -border_size().left; - - if((_input->info()->valid_region().anchor[0] + _input->info()->valid_region().shape[0]) % 2 == 0) - { - _l2_load_offset += 1; - } - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), _l2_load_offset, num_elems_read_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window win_in(window); - win_in.shift(Window::DimX, _l2_load_offset); - - //The output is half the width of the input: - Window win_out(window); - win_out.scale(Window::DimX, 0.5f); - - Window slice_in = win_in.first_slice_window_2D(); - Window slice_out = win_out.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice_in); - add_2D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out, lws_hint()); - } - while(win_in.slide_window_slice_2D(slice_in) && win_out.slide_window_slice_2D(slice_out)); -} - -CLGaussianPyramidVertKernel::CLGaussianPyramidVertKernel() - : _t2_load_offset(0) -{ -} - -BorderSize CLGaussianPyramidVertKernel::border_size() const -{ - return BorderSize{ 2, 0 }; -} - -void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLGaussianPyramidVertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); - - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); - } - - _input = input; - _output = output; - - // Create kernel - const std::string kernel_name = std::string("gaussian5x1_sub_y"); - _kernel = create_kernel(compile_context, "gaussian5x1_sub_y"); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_rows_processed_per_iteration = 2; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_per_iteration = 5; - - const float scale_y = static_cast(output->info()->dimension(1)) / input->info()->dimension(1); - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_rows_processed_per_iteration)); - AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_per_iteration, 1.f, scale_y); - - // Determine whether we need to load even or odd rows. See above for a - // detailed explanation. - _t2_load_offset = -border_size().top; - - if((_input->info()->valid_region().anchor[1] + _input->info()->valid_region().shape[1]) % 2 == 0) - { - _t2_load_offset += 1; - } - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), 0, _t2_load_offset, num_elems_read_per_iteration, num_rows_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON(window.x().step() != 8); - ARM_COMPUTE_ERROR_ON(window.y().step() % 2); - - Window win_in(window); - win_in.shift(Window::DimY, _t2_load_offset); - - Window win_out(window); - win_out.scale(Window::DimY, 0.5f); - - Window slice_in = win_in.first_slice_window_2D(); - Window slice_out = win_out.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice_in); - add_2D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out, lws_hint()); - } - while(win_in.slide_window_slice_2D(slice_in) && win_out.slide_window_slice_2D(slice_out)); -} diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.h b/src/core/CL/kernels/CLGaussianPyramidKernel.h deleted file mode 100644 index a6595440f6..0000000000 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H -#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H - -#include "src/core/CL/ICLSimpleKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */ -class CLGaussianPyramidHorKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLGaussianPyramidHorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default; - /** Default destructor */ - ~CLGaussianPyramidHorKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - int _l2_load_offset; -}; - -/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */ -class CLGaussianPyramidVertKernel : public ICLSimpleKernel -{ -public: - /** Default constructor */ - CLGaussianPyramidVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default; - /** Default destructor */ - ~CLGaussianPyramidVertKernel() = default; - - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] input Source tensor. Data types supported: U16. - * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's source, destination and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U16. - * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - int _t2_load_offset; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */ diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp deleted file mode 100644 index cd3f1ee216..0000000000 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLHOGDescriptorKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include -#include - -using namespace arm_compute; - -CLHOGOrientationBinningKernel::CLHOGOrientationBinningKernel() - : _input_magnitude(nullptr), _input_phase(nullptr), _output(nullptr), _cell_size() -{ -} - -void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input_magnitude, input_phase, output, hog_info); -} - -void CLHOGOrientationBinningKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_magnitude, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_phase, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(hog_info == nullptr); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, hog_info->num_bins(), DataType::F32); - ARM_COMPUTE_ERROR_ON(input_magnitude->info()->dimension(Window::DimX) != input_phase->info()->dimension(Window::DimX)); - ARM_COMPUTE_ERROR_ON(input_magnitude->info()->dimension(Window::DimY) != input_phase->info()->dimension(Window::DimY)); - - _input_magnitude = input_magnitude; - _input_phase = input_phase; - _output = output; - _cell_size = hog_info->cell_size(); - - float phase_scale = (PhaseType::SIGNED == hog_info->phase_type() ? hog_info->num_bins() / 360.0f : hog_info->num_bins() / 180.0f); - phase_scale *= (PhaseType::SIGNED == hog_info->phase_type() ? 360.0f / 255.0f : 1.0f); - - std::stringstream args_str; - args_str << "-DCELL_WIDTH=" << hog_info->cell_size().width << " "; - args_str << "-DCELL_HEIGHT=" << hog_info->cell_size().height << " "; - args_str << "-DNUM_BINS=" << hog_info->num_bins() << " "; - args_str << "-DPHASE_SCALE=" << phase_scale << " "; - - // Construct kernel name - std::set build_opts = {}; - build_opts.insert(args_str.str()); - - // Create kernel - const std::string kernel_name = std::string("hog_orientation_binning"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - constexpr unsigned int num_elems_processed_per_iteration = 1; - constexpr unsigned int num_elems_read_per_iteration = 1; - const unsigned int num_rows_read_per_iteration = hog_info->cell_size().height; - constexpr unsigned int num_elems_written_per_iteration = 1; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input_magnitude->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration), - AccessWindowRectangle(input_phase->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input_magnitude->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input_magnitude->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input_magnitude->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -void CLHOGOrientationBinningKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - // Compute slice for the magnitude and phase tensors - Window slice_mag_phase = window.first_slice_window_2D(); - slice_mag_phase.set(Window::DimX, Window::Dimension(window.x().start() * _cell_size.width, window.x().start() * _cell_size.width, _cell_size.width)); - slice_mag_phase.set(Window::DimY, Window::Dimension(window.y().start() * _cell_size.height, window.y().start() * _cell_size.height, _cell_size.height)); - - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input_magnitude, slice_mag_phase); - add_2D_tensor_argument(idx, _input_phase, slice_mag_phase); - add_2D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -CLHOGBlockNormalizationKernel::CLHOGBlockNormalizationKernel() - : _input(nullptr), _output(nullptr), _num_cells_per_block_stride() -{ -} - -void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, hog_info); -} - -void CLHOGBlockNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info) -{ - ARM_COMPUTE_ERROR_ON(hog_info == nullptr); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, hog_info->num_bins(), DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32); - - // Number of cells per block - const Size2D num_cells_per_block(hog_info->block_size().width / hog_info->cell_size().width, - hog_info->block_size().height / hog_info->cell_size().height); - - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, hog_info->num_bins() * num_cells_per_block.area(), DataType::F32); - - // Number of cells per block stride - const Size2D num_cells_per_block_stride(hog_info->block_stride().width / hog_info->cell_size().width, - hog_info->block_stride().height / hog_info->cell_size().height); - - _input = input; - _output = output; - _num_cells_per_block_stride = num_cells_per_block_stride; - - std::stringstream args_str; - args_str << "-DL2_HYST_THRESHOLD=" << hog_info->l2_hyst_threshold() << " "; - args_str << "-DNUM_CELLS_PER_BLOCK_HEIGHT=" << num_cells_per_block.height << " "; - args_str << "-DNUM_BINS_PER_BLOCK_X=" << num_cells_per_block.width *hog_info->num_bins() << " "; - args_str << "-DNUM_BINS_PER_BLOCK=" << _output->info()->num_channels() << " "; - args_str << "-DL2_NORM=" << static_cast(HOGNormType::L2_NORM) << " "; - args_str << "-DL1_NORM=" << static_cast(HOGNormType::L1_NORM) << " "; - args_str << "-DL2HYS_NORM=" << static_cast(HOGNormType::L2HYS_NORM) << " "; - args_str << "-DHOG_NORM_TYPE=" << static_cast(hog_info->normalization_type()) << " "; - - // Construct kernel name - std::set build_opts = {}; - build_opts.insert(args_str.str()); - - const std::string kernel_name = std::string("hog_block_normalization"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - constexpr unsigned int num_elems_processed_per_iteration = 1; - constexpr unsigned int num_elems_read_per_iteration = 1; - const unsigned int num_rows_read_per_iteration = num_cells_per_block.height; - constexpr unsigned int num_elems_written_per_iteration = 1; - const unsigned int num_rows_written_per_iteration = num_cells_per_block.height; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, num_rows_written_per_iteration); - - update_window_and_padding(win, - AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration), - output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -void CLHOGBlockNormalizationKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - // Compute slice for the magnitude and phase tensors - Window slice_in = window.first_slice_window_2D(); - slice_in.set_dimension_step(Window::DimX, _num_cells_per_block_stride.width); - slice_in.set_dimension_step(Window::DimY, _num_cells_per_block_stride.height); - - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice_in); - add_2D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.h b/src/core/CL/kernels/CLHOGDescriptorKernel.h deleted file mode 100644 index eee2fa36bc..0000000000 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H -#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H - -#include "arm_compute/core/IHOG.h" -#include "arm_compute/core/Size2D.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** OpenCL kernel to perform HOG Orientation Binning */ -class CLHOGOrientationBinningKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGOrientationBinningKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default; - /** Default destructor */ - ~CLHOGOrientationBinningKernel() = default; - - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); - /** Initialise the kernel's inputs, output and HOG's metadata - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16. - * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8 - * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[in] hog_info HOG's metadata - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input_magnitude; - const ICLTensor *_input_phase; - ICLTensor *_output; - Size2D _cell_size; -}; - -/** OpenCL kernel to perform HOG block normalization */ -class CLHOGBlockNormalizationKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGBlockNormalizationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default; - /** Default destructor */ - ~CLHOGBlockNormalizationKernel() = default; - - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); - /** Initialise the kernel's input, output and HOG's metadata - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell - * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog_info HOG's metadata - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - Size2D _num_cells_per_block_stride; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */ diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp deleted file mode 100644 index 861155b9a2..0000000000 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLHOGDetectorKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLHOG.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -CLHOGDetectorKernel::CLHOGDetectorKernel() - : _input(nullptr), _detection_windows(), _num_detection_windows(nullptr) -{ -} - -void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, - float threshold, uint16_t idx_class) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, num_detection_windows, detection_window_stride, threshold, idx_class); -} - -void CLHOGDetectorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, - const Size2D &detection_window_stride, - float threshold, uint16_t idx_class) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F32); - ARM_COMPUTE_ERROR_ON(hog == nullptr); - ARM_COMPUTE_ERROR_ON(detection_windows == nullptr); - ARM_COMPUTE_ERROR_ON(num_detection_windows == nullptr); - ARM_COMPUTE_ERROR_ON((detection_window_stride.width % hog->info()->block_stride().width) != 0); - ARM_COMPUTE_ERROR_ON((detection_window_stride.height % hog->info()->block_stride().height) != 0); - - const Size2D &detection_window_size = hog->info()->detection_window_size(); - const Size2D &block_size = hog->info()->block_size(); - const Size2D &block_stride = hog->info()->block_stride(); - - _input = input; - _detection_windows = detection_windows; - _num_detection_windows = num_detection_windows; - - const unsigned int num_bins_per_descriptor_x = ((detection_window_size.width - block_size.width) / block_stride.width + 1) * input->info()->num_channels(); - const unsigned int num_blocks_per_descriptor_y = (detection_window_size.height - block_size.height) / block_stride.height + 1; - - ARM_COMPUTE_ERROR_ON((num_bins_per_descriptor_x * num_blocks_per_descriptor_y + 1) != hog->info()->descriptor_size()); - - std::stringstream args_str; - args_str << "-DNUM_BLOCKS_PER_DESCRIPTOR_Y=" << num_blocks_per_descriptor_y << " "; - args_str << "-DNUM_BINS_PER_DESCRIPTOR_X=" << num_bins_per_descriptor_x << " "; - args_str << "-DTHRESHOLD=" << threshold << " "; - args_str << "-DMAX_NUM_DETECTION_WINDOWS=" << detection_windows->max_num_values() << " "; - args_str << "-DIDX_CLASS=" << idx_class << " "; - args_str << "-DDETECTION_WINDOW_WIDTH=" << detection_window_size.width << " "; - args_str << "-DDETECTION_WINDOW_HEIGHT=" << detection_window_size.height << " "; - args_str << "-DDETECTION_WINDOW_STRIDE_WIDTH=" << detection_window_stride.width << " "; - args_str << "-DDETECTION_WINDOW_STRIDE_HEIGHT=" << detection_window_stride.height << " "; - - // Construct kernel name - std::set build_opts = {}; - build_opts.insert(args_str.str()); - - // Create kernel - const std::string kernel_name = std::string("hog_detector"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Set static kernel arguments - unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input parameters - _kernel.setArg(idx++, hog->cl_buffer()); - _kernel.setArg(idx++, detection_windows->cl_buffer()); - _kernel.setArg(idx++, *_num_detection_windows); - - // Get the number of blocks along the x and y directions of the input tensor - const ValidRegion &valid_region = input->info()->valid_region(); - const size_t num_blocks_x = valid_region.shape[0]; - const size_t num_blocks_y = valid_region.shape[1]; - - // Get the number of blocks along the x and y directions of the detection window - const size_t num_blocks_per_detection_window_x = detection_window_size.width / block_stride.width; - const size_t num_blocks_per_detection_window_y = detection_window_size.height / block_stride.height; - - const size_t window_step_x = detection_window_stride.width / block_stride.width; - const size_t window_step_y = detection_window_stride.height / block_stride.height; - - // Configure kernel window - Window win; - win.set(Window::DimX, Window::Dimension(0, floor_to_multiple(num_blocks_x - num_blocks_per_detection_window_x, window_step_x) + window_step_x, window_step_x)); - win.set(Window::DimY, Window::Dimension(0, floor_to_multiple(num_blocks_y - num_blocks_per_detection_window_y, window_step_y) + window_step_y, window_step_y)); - - constexpr unsigned int num_elems_read_per_iteration = 1; - const unsigned int num_rows_read_per_iteration = num_blocks_per_descriptor_y; - - update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration)); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -void CLHOGDetectorKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.h b/src/core/CL/kernels/CLHOGDetectorKernel.h deleted file mode 100644 index c28e6ebe74..0000000000 --- a/src/core/CL/kernels/CLHOGDetectorKernel.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H -#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "arm_compute/core/CL/ICLHOG.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "src/core/CL/ICLKernel.h" - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform HOG detector kernel using linear SVM */ -class CLHOGDetectorKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHOGDetectorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default; - /** Default destructor */ - ~CLHOGDetectorKernel() = default; - - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] num_detection_windows Number of detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, - uint16_t idx_class = 0); - /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block - * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel - * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects - * @param[in] num_detection_windows Number of detected objects - * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions. - * It must be multiple of the hog->info()->block_stride() - * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane - * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, - const Size2D &detection_window_stride, float threshold = 0.0f, - uint16_t idx_class = 0); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue); - -private: - const ICLTensor *_input; - ICLDetectionWindowArray *_detection_windows; - cl::Buffer *_num_detection_windows; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */ diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp deleted file mode 100644 index cbc056fb77..0000000000 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLHarrisCornersKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include -#include - -using namespace arm_compute; - -CLHarrisScoreKernel::CLHarrisScoreKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr), _sensitivity(), _strength_thresh(), _norm_factor(), _border_size(0) -{ -} - -BorderSize CLHarrisScoreKernel::border_size() const -{ - return _border_size; -} - -void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, block_size, norm_factor, strength_thresh, sensitivity, border_undefined); -} - -void CLHarrisScoreKernel::configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); - ARM_COMPUTE_ERROR_ON(!(block_size == 3 || block_size == 5 || block_size == 7)); - ARM_COMPUTE_ERROR_ON(0.0f == norm_factor); - - _input1 = input1; - _input2 = input2; - _output = output; - _sensitivity = sensitivity; - _strength_thresh = strength_thresh; - _norm_factor = norm_factor; - _border_size = BorderSize(block_size / 2); - - // Select kernel - std::stringstream harris_score_kernel_name; - harris_score_kernel_name << "harris_score_" << block_size << "x" << block_size; - - // Create build options - std::set build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())) }; - - // Create kernel - _kernel = create_kernel(compile_context, harris_score_kernel_name.str(), build_opts); - - // Set static kernel arguments - unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, sensitivity); - _kernel.setArg(idx++, strength_thresh); - _kernel.setArg(idx++, norm_factor); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 4; - constexpr unsigned int num_elems_written_per_iteration = 4; - const unsigned int num_elems_read_per_iteration = block_size == 7 ? 10 : 8; - const unsigned int num_rows_read_per_iteration = block_size; - - Window win = calculate_max_window(*_input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input1_access(input1->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowRectangle input2_access(input2->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input1_access, input2_access, output_access); - - ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), input2->info()->valid_region()); - output_access.set_valid_region(win, valid_region, border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = harris_score_kernel_name.str(); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input1->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->info()->dimension(1)); - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input2->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->info()->dimension(1)); -} - -void CLHarrisScoreKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input1, slice); - add_2D_tensor_argument(idx, _input2, slice); - add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.h b/src/core/CL/kernels/CLHarrisCornersKernel.h deleted file mode 100644 index 6482b0aa4e..0000000000 --- a/src/core/CL/kernels/CLHarrisCornersKernel.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H -#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the harris score kernel. - * - * @note The implementation supports 3, 5, and 7 for the block_size. - */ -class CLHarrisScoreKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHarrisScoreKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default; - /** Default destructor */ - ~CLHarrisScoreKernel() = default; - - /** Setup the kernel parameters - * - * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) - * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) - * @param[out] output Destination image (harris score). Data types supported F32 - * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined); - /** Setup the kernel parameters - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2) - * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1) - * @param[out] output Destination image (harris score). Data types supported F32 - * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7 - * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0) - * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel). - * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output, - int32_t block_size, float norm_factor, float strength_thresh, float sensitivity, - bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -protected: - const ICLImage *_input1; /**< Source image - Gx component */ - const ICLImage *_input2; /**< Source image - Gy component */ - ICLImage *_output; /**< Source image - Harris score */ - float _sensitivity; /**< Sensitivity value */ - float _strength_thresh; /**< Threshold value */ - float _norm_factor; /**< Normalization factor */ - BorderSize _border_size; /**< Border size */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */ diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp deleted file mode 100644 index ca5322aa51..0000000000 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLHistogramKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLDistribution1D.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -// each thread handle 16 pixels -constexpr signed int pixels_per_item = 16; - -// local work group size in X dimension -constexpr unsigned int local_x_size = 16; - -CLHistogramKernel::CLHistogramKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLHistogramKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - // Check input size - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - - // Check offset - ARM_COMPUTE_ERROR_ON_MSG(0 > output->offset() || output->offset() > 256, "Offset is larger than the image value range."); - - // Check range - ARM_COMPUTE_ERROR_ON_MSG(output->range() > 256 /* max range */, "Range larger than the image value range."); - - _input = input; - _output = output; - - if(_input->info()->dimension(0) < pixels_per_item) - { - return; - } - - unsigned int num_bins = _output->num_bins(); - unsigned int window_size = _output->window(); - unsigned int offset = _output->offset(); - unsigned int range = _output->range(); - unsigned int offrange = offset + range; - unsigned int bin_size = _output->size(); - unsigned int buffer_size = bin_size + 1; // We need one extra place for pixels that don't meet the conditions - - // Create kernel - bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange); - const std::string kernel_name = is_fixed_size ? "hist_local_kernel_fixed" : "hist_local_kernel"; - _kernel = create_kernel(compile_context, kernel_name); - - // Set static kernel arguments - unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, buffer_size, nullptr); - _kernel.setArg(idx++, _output->cl_buffer()); - if(!is_fixed_size) - { - _kernel.setArg(idx++, num_bins); - _kernel.setArg(idx++, offset); - _kernel.setArg(idx++, range); - _kernel.setArg(idx++, offrange); - } - - // We only run histogram on Image, therefore only 2 dimensions here - unsigned int end_position = (_input->info()->dimension(0) / pixels_per_item) * pixels_per_item; - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(0, end_position, pixels_per_item)); - win.set(1, Window::Dimension(0, _input->info()->dimension(1))); - - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, pixels_per_item)); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - // TODO (COMPMID-679): Add CLMemFill - _output->map(queue, true); - ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr); - memset(_output->buffer(), 0, _output->size()); - _output->unmap(queue); - - if(_input->info()->dimension(0) < pixels_per_item) - { - return; - } - - Window slice = window.first_slice_window_2D(); - const unsigned int gws_x = (window.x().end() - window.x().start()) / window.x().step(); - cl::NDRange lws = (local_x_size < gws_x) ? cl::NDRange(local_x_size, 1) : cl::NDRange(1, 1); - - do - { - /* Run the core part which has width can be divided by 16 */ - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - - enqueue(queue, *this, slice, lws); - } - while(window.slide_window_slice_2D(slice)); -} - -CLHistogramBorderKernel::CLHistogramBorderKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLHistogramBorderKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(nullptr == output); - - // Check input size - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - - // Check offset - ARM_COMPUTE_ERROR_ON_MSG(0 > output->offset() || output->offset() > 256, "Offset is larger than the image value range."); - - // Check range - ARM_COMPUTE_ERROR_ON_MSG(output->range() > 256 /* max range */, "Range larger than the image value range."); - - // We only run histogram on Image, therefore only 2 dimensions here - unsigned int start_position = (input->info()->dimension(0) / pixels_per_item) * pixels_per_item; - - if(start_position >= input->info()->dimension(0)) - { - return; // no need to run histogram border kernel - } - - _input = input; - _output = output; - - unsigned int num_bins = _output->num_bins(); - unsigned int window_size = _output->window(); - unsigned int offset = _output->offset(); - unsigned int range = _output->range(); - unsigned int offrange = offset + range; - - // Create kernel - bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange); - const std::string kernel_name = is_fixed_size ? "hist_border_kernel_fixed" : "hist_border_kernel"; - _kernel = create_kernel(compile_context, kernel_name); - - // Set static kernel arguments - unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, _output->cl_buffer()); - if(!is_fixed_size) - { - _kernel.setArg(idx++, num_bins); - _kernel.setArg(idx++, offset); - _kernel.setArg(idx++, range); - _kernel.setArg(idx++, offrange); - } - - // Configure kernel window - Window win; - win.set(0, Window::Dimension(start_position, _input->info()->dimension(0))); - win.set(1, Window::Dimension(0, _input->info()->dimension(1))); - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, 1)); - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -void CLHistogramBorderKernel::run(const Window &window, cl::CommandQueue &queue) -{ - if(window.x().start() >= window.x().end()) - { - return; - } - - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - cl::NDRange lws = cl::NDRange(1, 1); - - Window slice = window.first_slice_window_2D(); - - do - { - /* Run the border part which has width cannot be divided by 16 */ - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - - enqueue(queue, *this, slice, lws); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLHistogramKernel.h b/src/core/CL/kernels/CLHistogramKernel.h deleted file mode 100644 index 9c97c6590d..0000000000 --- a/src/core/CL/kernels/CLHistogramKernel.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H -#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLDistribution1D; -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16. - * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel - */ -class CLHistogramKernel : public ICLKernel -{ -public: - /** Constructor */ - CLHistogramKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramKernel(const CLHistogramKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramKernel &operator=(const CLHistogramKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHistogramKernel(CLHistogramKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHistogramKernel &operator=(CLHistogramKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const ICLImage *input, ICLDistribution1D *output); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; - ICLDistribution1D *_output; -}; - -/** Interface to run the histogram kernel to handle the leftover part of image - * - */ -class CLHistogramBorderKernel : public ICLKernel -{ -public: - /** Constructor */ - CLHistogramBorderKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default; - /** Initialise the kernel's input, output and border mode. - * - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const ICLImage *input, ICLDistribution1D *output); - /** Initialise the kernel's input, output and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source image. Data types supported: U8. - * @param[out] output Destination distribution. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; - ICLDistribution1D *_output; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/ diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp deleted file mode 100644 index 5e5683d231..0000000000 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLIntegralImageKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include - -using namespace arm_compute; - -void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLIntegralImageHorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32); - - _input = input; - _output = output; - - // Create kernel - const std::string kernel_name = std::string("integral_horizontal"); - _kernel = create_kernel(compile_context, kernel_name); - - // Configure kernel window - const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0); - const unsigned int num_elems_accessed_per_iteration = ceil_to_multiple(num_elems_processed_per_iteration, 16); - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_accessed_per_iteration); - - update_window_and_padding(win, - AccessWindowHorizontal(input->info(), 0, num_elems_accessed_per_iteration), - output_access); - - output_access.set_valid_region(win, input->info()->valid_region()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -CLIntegralImageVertKernel::CLIntegralImageVertKernel() - : _in_out(nullptr) -{ -} - -void CLIntegralImageVertKernel::configure(ICLTensor *in_out) -{ - configure(CLKernelLibrary::get().get_compile_context(), in_out); -} - -void CLIntegralImageVertKernel::configure(const CLCompileContext &compile_context, ICLTensor *in_out) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(in_out, 1, DataType::U32); - - _in_out = in_out; - - // Create kernel - const std::string kernel_name = std::string("integral_vertical"); - _kernel = create_kernel(compile_context, kernel_name); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration_x = 8; - const unsigned int num_elems_processed_per_iteration_y = in_out->info()->dimension(Window::DimY); - - Window win = calculate_max_window(*in_out->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - - AccessWindowRectangle in_out_access(in_out->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); - - update_window_and_padding(win, in_out_access); - - in_out_access.set_valid_region(win, in_out->info()->valid_region()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(in_out->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(in_out->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(in_out->info()->dimension(1)); -} - -void CLIntegralImageVertKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const size_t height = _in_out->info()->dimension(1); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _in_out, slice); - _kernel.setArg(idx++, height); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLIntegralImageKernel.h b/src/core/CL/kernels/CLIntegralImageKernel.h deleted file mode 100644 index 0e40e3afbc..0000000000 --- a/src/core/CL/kernels/CLIntegralImageKernel.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H -#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H - -#include "src/core/CL/ICLKernel.h" -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to run the horizontal pass of the integral image kernel. */ -class CLIntegralImageHorKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output Destination tensor, Data types supported: U32. - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output Destination tensor, Data types supported: U32. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); -}; - -/** Interface to run the vertical pass of the integral image kernel. */ -class CLIntegralImageVertKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLIntegralImageVertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in,out] in_out The input/output tensor. Data types supported: U32 - */ - void configure(ICLTensor *in_out); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in,out] in_out The input/output tensor. Data types supported: U32 - */ - void configure(const CLCompileContext &compile_context, ICLTensor *in_out); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - ICLTensor *_in_out; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */ diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp deleted file mode 100644 index 9845dd6169..0000000000 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -CLMagnitudePhaseKernel::CLMagnitudePhaseKernel() - : _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr), _run_mag(false), _run_phase(false) -{ -} - -void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type, PhaseType phase_type) -{ - configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, mag_type, phase_type); -} - -void CLMagnitudePhaseKernel::configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type, PhaseType phase_type) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON((magnitude == nullptr) && (phase == nullptr)); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy); - - _run_mag = (magnitude != nullptr); - _run_phase = (phase != nullptr); - if(_run_mag) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::S16, DataType::S32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, magnitude); - } - if(_run_phase) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8); - } - - if(!_run_mag && !_run_phase) - { - ARM_COMPUTE_ERROR("At least one output must be NOT NULL"); - } - - _gx = gx; - _gy = gy; - _magnitude = magnitude; - _phase = phase; - - // Construct kernel name - std::set build_opts = {}; - - // Add magnitude type - if(_run_mag) - { - switch(mag_type) - { - case MagnitudeType::L1NORM: - build_opts.insert("-DMAGNITUDE=1"); - break; - case MagnitudeType::L2NORM: - build_opts.insert("-DMAGNITUDE=2"); - break; - default: - ARM_COMPUTE_ERROR("Unsupported magnitude calculation type."); - build_opts.insert("-DMAGNITUDE=0"); - break; - } - } - - // Add phase type - if(_run_phase) - { - switch(phase_type) - { - case PhaseType::UNSIGNED: - build_opts.insert("-DPHASE=1"); - break; - case PhaseType::SIGNED: - build_opts.insert("-DPHASE=2"); - break; - default: - ARM_COMPUTE_ERROR("Unsupported phase calculation type."); - build_opts.insert("-DPHASE=0"); - break; - } - } - - // Add data_type - build_opts.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(gx->info()->data_type())); - - // Create kernel - const std::string kernel_name = std::string("magnitude_phase"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 16; - - Window win = calculate_max_window(*gx->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal gx_access(gx->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal gy_access(gy->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_magnitude_access(magnitude == nullptr ? nullptr : magnitude->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_phase_access(phase == nullptr ? nullptr : phase->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - gx_access, gy_access, - output_magnitude_access, output_phase_access); - - ValidRegion valid_region = intersect_valid_regions(gx->info()->valid_region(), - gy->info()->valid_region()); - output_magnitude_access.set_valid_region(win, valid_region); - output_phase_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(gx->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(gx->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(gx->info()->dimension(1)); -} - -void CLMagnitudePhaseKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _gx, slice); - add_2D_tensor_argument(idx, _gy, slice); - add_2D_tensor_argument_if((_run_mag), idx, _magnitude, slice); - add_2D_tensor_argument_if((_run_phase), idx, _phase, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.h b/src/core/CL/kernels/CLMagnitudePhaseKernel.h deleted file mode 100644 index 514036b2ff..0000000000 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H -#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Template interface for the kernel to compute magnitude and phase. - * - */ -class CLMagnitudePhaseKernel : public ICLKernel -{ -public: - /** Default constructor. */ - CLMagnitudePhaseKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default; - /** Initialise the kernel's input, output. - * - * @note At least one of output1 or output2 must be set. - * - * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. - * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. - * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); - /** Initialise the kernel's input, output. - * - * @note At least one of output1 or output2 must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] gx The input gradient X tensor. Data types supported: S16/S32. - * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32. - * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32. - * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8. - * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM. - * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, - MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_gx; /**< Input gradient X. */ - const ICLTensor *_gy; /**< Input gradient Y. */ - ICLTensor *_magnitude; /**< Output - Magnitude. */ - ICLTensor *_phase; /**< Output - Phase. */ - bool _run_mag; /**< Calculate magnitude ? */ - bool _run_phase; /**< Calculate phase ? */ -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */ diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp deleted file mode 100644 index aed6e6eaf7..0000000000 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLMeanStdDevKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include - -using namespace arm_compute; - -CLMeanStdDevKernel::CLMeanStdDevKernel() - : _input(nullptr), _mean(nullptr), _stddev(nullptr), _global_sum(nullptr), _global_sum_squared(nullptr), _border_size(0) -{ -} - -BorderSize CLMeanStdDevKernel::border_size() const -{ - return _border_size; -} - -Status CLMeanStdDevKernel::validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) -{ - ARM_COMPUTE_UNUSED(mean); - ARM_COMPUTE_UNUSED(stddev); - ARM_COMPUTE_UNUSED(global_sum); - ARM_COMPUTE_UNUSED(global_sum_squared); - ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED(); - ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - - return Status{}; -} - -void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, mean, global_sum, stddev, global_sum_squared); -} - -void CLMeanStdDevKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum); - ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared); - ARM_COMPUTE_ERROR_THROW_ON(CLMeanStdDevKernel::validate(input->info(), mean, global_sum, stddev, global_sum_squared)); - - _input = input; - _mean = mean; - _stddev = stddev; - _global_sum = global_sum; - _global_sum_squared = global_sum_squared; - - // Create kernel - std::set build_opts; - - if(_stddev != nullptr) - { - build_opts.insert("-DSTDDEV"); - } - - _kernel = create_kernel(compile_context, "mean_stddev_accumulate", build_opts); - - // Set fixed arguments - unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input parameters - - _kernel.setArg(idx++, static_cast(input->info()->dimension(1))); - _kernel.setArg(idx++, *_global_sum); - - if(_stddev != nullptr) - { - _kernel.setArg(idx++, *_global_sum_squared); - } - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration_x = 8; - const unsigned int num_elems_processed_per_iteration_y = input->info()->dimension(1); - - _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) - input->info()->dimension(0)); - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); - AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); - update_window_and_padding(win, input_access); - - ICLKernel::configure_internal(win); -} - -void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - // Clear sums - static const cl_ulong zero = 0; - queue.enqueueWriteBuffer(*_global_sum, CL_FALSE, 0, sizeof(cl_ulong), &zero); - - if(_stddev != nullptr) - { - queue.enqueueWriteBuffer(*_global_sum_squared, CL_FALSE, 0, sizeof(cl_ulong), &zero); - } - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - // Set slice step equal to height to force gws[1] to 1, - // as each thread calculates the sum across all rows and columns equal to the number of elements processed by each work-item - slice.set_dimension_step(Window::DimY, _input->info()->dimension(1)); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); - - // Calculate mean and stddev - cl_ulong global_sum = 0; - cl_ulong global_sum_squared = 0; - const float num_pixels = _input->info()->dimension(0) * _input->info()->dimension(1); - - queue.enqueueReadBuffer(*_global_sum, CL_TRUE, 0, sizeof(cl_ulong), static_cast(&global_sum)); - const float mean = global_sum / num_pixels; - *_mean = mean; - - if(_stddev != nullptr) - { - queue.enqueueReadBuffer(*_global_sum_squared, CL_TRUE, 0, sizeof(cl_ulong), static_cast(&global_sum_squared)); - *_stddev = std::sqrt((global_sum_squared / num_pixels) - (mean * mean)); - } -} diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.h b/src/core/CL/kernels/CLMeanStdDevKernel.h deleted file mode 100644 index 179a2025b7..0000000000 --- a/src/core/CL/kernels/CLMeanStdDevKernel.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H -#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace cl -{ -class Buffer; -} - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */ -class CLMeanStdDevKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMeanStdDevKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default; - /** Initialise the kernel's input and outputs. - * - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). - */ - void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - /** Initialise the kernel's input and outputs. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input image. Data types supported: U8. - * @param[out] mean Input average pixel value. - * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong). - * @param[out] stddev (Optional) Output standard deviation of pixel values. - * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong). - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel. - * - * @param[in] input Input image info. Data types supported: U8. - * @param[in] mean Input average pixel value. - * @param[in] global_sum Keeps global sum of pixel values. - * @param[in] stddev (Optional) Output standard deviation of pixel values. - * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - - BorderSize border_size() const override; - -private: - const ICLImage *_input; - float *_mean; - float *_stddev; - cl::Buffer *_global_sum; - cl::Buffer *_global_sum_squared; - BorderSize _border_size; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */ diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp deleted file mode 100644 index 23a21d6b19..0000000000 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLMedian3x3Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute; - -BorderSize CLMedian3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLMedian3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - _input = input; - _output = output; - - // Create kernel - const std::string kernel_name = std::string("non_linear_filter_box3x3"); - _kernel = create_kernel(compile_context, kernel_name, { "-DMEDIAN" }); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.h b/src/core/CL/kernels/CLMedian3x3Kernel.h deleted file mode 100644 index 8cc5ed7279..0000000000 --- a/src/core/CL/kernels/CLMedian3x3Kernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H -#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the median 3x3 filter kernel. - * - */ -class CLMedian3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input and output. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp deleted file mode 100644 index 675cfc19a9..0000000000 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLMinMaxLocationKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include - -namespace arm_compute -{ -inline int32_t FloatFlip(float val) -{ - static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t"); - int32_t int_val = 0; - - memcpy(&int_val, &val, sizeof(float)); - int_val = (int_val >= 0) ? int_val : int_val ^ 0x7FFFFFFF; - return int_val; -} - -inline float IFloatFlip(int32_t val) -{ - static_assert(sizeof(float) == sizeof(int32_t), "Float must be same size as int32_t"); - float flt_val = 0.f; - - val = (val >= 0) ? val : val ^ 0x7FFFFFFF; - memcpy(&flt_val, &val, sizeof(float)); - return flt_val; -} - -CLMinMaxKernel::CLMinMaxKernel() - : _input(nullptr), _min_max(), _data_type_max_min() -{ -} - -void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, min_max); -} - -void CLMinMaxKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(min_max == nullptr); - - _input = input; - _min_max = min_max; - const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0); - - switch(input->info()->data_type()) - { - case DataType::U8: - _data_type_max_min[0] = UCHAR_MAX; - _data_type_max_min[1] = 0; - break; - case DataType::S16: - _data_type_max_min[0] = SHRT_MAX; - _data_type_max_min[1] = SHRT_MIN; - break; - case DataType::F32: - _data_type_max_min[0] = FloatFlip(std::numeric_limits::max()); - _data_type_max_min[1] = FloatFlip(std::numeric_limits::lowest()); - break; - default: - ARM_COMPUTE_ERROR("You called with the wrong image data types"); - } - - // Set kernel build options - std::set build_opts{ "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()) }; - - if(num_elems_processed_per_iteration % max_cl_vector_width != 0) - { - build_opts.emplace("-DNON_MULTIPLE_OF_16"); - } - - if(input->info()->data_type() == DataType::F32) - { - build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(std::numeric_limits::max())); - build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(std::numeric_limits::lowest())); - build_opts.emplace("-DIS_DATA_TYPE_FLOAT"); - } - else - { - build_opts.emplace("-DDATA_TYPE_MAX=" + support::cpp11::to_string(_data_type_max_min[0])); - build_opts.emplace("-DDATA_TYPE_MIN=" + support::cpp11::to_string(_data_type_max_min[1])); - } - - // Create kernel - _kernel = create_kernel(compile_context, "minmax", build_opts); - - // Set fixed arguments - unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, *_min_max); - _kernel.setArg(idx++, static_cast(input->info()->dimension(0))); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16))); - ICLKernel::configure_internal(win); -} - -void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - // Reset mininum and maximum values - queue.enqueueWriteBuffer(*_min_max, CL_FALSE /* blocking */, 0, _data_type_max_min.size() * sizeof(int), _data_type_max_min.data()); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); - - cl_int min = 0; - cl_int max = 0; - queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 0 * sizeof(cl_int), sizeof(cl_int), static_cast(&min)); - queue.enqueueReadBuffer(*_min_max, CL_TRUE /* blocking */, 1 * sizeof(cl_int), sizeof(cl_int), static_cast(&max)); - - if(_input->info()->data_type() == DataType::F32) - { - std::array min_max = - { - { - IFloatFlip(min), - IFloatFlip(max) - } - }; - queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(float), min_max.data()); - } - else - { - std::array min_max = { { min, max } }; - queue.enqueueWriteBuffer(*_min_max, CL_TRUE /* blocking */, 0, min_max.size() * sizeof(int32_t), min_max.data()); - } -} - -CLMinMaxLocationKernel::CLMinMaxLocationKernel() - : _input(nullptr), _min_max_count(nullptr) -{ -} - -void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, ICLCoordinates2DArray *max_loc) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, min_max, min_max_count, min_loc, max_loc); -} - -void CLMinMaxLocationKernel::configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, - ICLCoordinates2DArray *max_loc) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON(min_max == nullptr); - ARM_COMPUTE_ERROR_ON(min_max_count == nullptr && min_loc == nullptr && max_loc == nullptr); - - _input = input; - _min_max_count = min_max_count; - - // Set kernel build options - std::set build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace((min_max_count != nullptr) ? "-DCOUNT_MIN_MAX" : ""); - build_opts.emplace((min_loc != nullptr) ? "-DLOCATE_MIN" : ""); - build_opts.emplace((max_loc != nullptr) ? "-DLOCATE_MAX" : ""); - if(input->info()->data_type() == DataType::F32) - { - build_opts.emplace("-DIS_DATA_TYPE_FLOAT"); - } - - // Create kernel - _kernel = create_kernel(compile_context, "minmaxloc", build_opts); - - // Set static arguments - unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, *min_max); - _kernel.setArg(idx++, *min_max_count); - if(min_loc != nullptr) - { - _kernel.setArg(idx++, min_loc->cl_buffer()); - _kernel.setArg(idx++, min_loc->max_num_values()); - } - if(max_loc != nullptr) - { - _kernel.setArg(idx++, max_loc->cl_buffer()); - _kernel.setArg(idx++, max_loc->max_num_values()); - } - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 1; - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win); -} - -void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - static const unsigned int zero_count = 0; - queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 0 * sizeof(zero_count), sizeof(zero_count), &zero_count); - queue.enqueueWriteBuffer(*_min_max_count, CL_FALSE, 1 * sizeof(zero_count), sizeof(zero_count), &zero_count); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.h b/src/core/CL/kernels/CLMinMaxLocationKernel.h deleted file mode 100644 index 2196abe033..0000000000 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H -#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H - -#include "arm_compute/core/CL/ICLArray.h" -#include "src/core/CL/ICLKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; -using ICLImage = ICLTensor; - -/** Interface for the kernel to perform min max search on an image. - */ -class CLMinMaxKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLMinMaxKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxKernel(const CLMinMaxKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxKernel(CLMinMaxKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const ICLImage *input, cl::Buffer *min_max); - /** Initialise the kernel's input and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input Image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Input image. */ - cl::Buffer *_min_max; /**< Minimum/maximum value. */ - std::array _data_type_max_min; /**< Maximum and minimum data type value respectively. */ -}; - -/** Interface for the kernel to find min max locations of an image. - */ -class CLMinMaxLocationKernel : public ICLKernel -{ -public: - /** Constructor */ - CLMinMaxLocationKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete; - /** Allow instances of this class to be moved */ - CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default; - /** Allow instances of this class to be moved */ - CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default; - /** Initialise the kernel's input and outputs. - * - * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. - * - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 - * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. - * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. - */ - void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, - ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); - /** Initialise the kernel's input and outputs. - * - * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input image. Data types supported: U8/S16/F32. - * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32. - * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32 - * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations. - * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations. - */ - void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, - ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLImage *_input; /**< Input image. */ - cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */ diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp deleted file mode 100644 index c73acaf1d8..0000000000 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLNonLinearFilterKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include -#include -#include - -using namespace arm_compute; - -CLNonLinearFilterKernel::CLNonLinearFilterKernel() - : _border_size(0) -{ -} - -BorderSize CLNonLinearFilterKernel::border_size() const -{ - return _border_size; -} - -void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, function, mask_size, pattern, mask, border_undefined); -} - -void CLNonLinearFilterKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(mask_size != 3 && mask_size != 5); - ARM_COMPUTE_ERROR_ON_MSG(pattern == MatrixPattern::OTHER, "MatrixPattern::OTHER is not supported!"); - ARM_COMPUTE_UNUSED(mask); - - _input = input; - _output = output; - _border_size = BorderSize(mask_size / 2); - - // Define build options - std::set build_opts; - build_opts.emplace("-D" + string_from_non_linear_filter_function(function)); - - // Define kernel - std::string pattern_name = string_from_matrix_pattern(pattern); - std::transform(pattern_name.begin(), pattern_name.end(), pattern_name.begin(), ::tolower); - std::stringstream ss; - ss << "non_linear_filter_" << pattern_name << mask_size << "x" << mask_size; - - // Create kernel - _kernel = create_kernel(compile_context, ss.str(), build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - const unsigned int num_rows_read_per_iteration = mask_size; - - Window win = calculate_max_window(*input->info(), num_elems_processed_per_iteration, border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.h b/src/core/CL/kernels/CLNonLinearFilterKernel.h deleted file mode 100644 index ed42063d2b..0000000000 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H -#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLSimple2DKernel.h" - -#include - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to apply a non-linear filter */ -class CLNonLinearFilterKernel : public ICLSimple2DKernel -{ -public: - /** Default constructor */ - CLNonLinearFilterKernel(); - /** Set the source, destination and border mode of the kernel - * - * @param[in] input Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined); - /** Set the source, destination and border mode of the kernel - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8 - * @param[out] output Destination tensor. Data types supported: U8 - * @param[in] function Non linear function to perform - * @param[in] mask_size Mask size. Supported sizes: 3, 5 - * @param[in] pattern Mask pattern - * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, - unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, - bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; - -private: - BorderSize _border_size; /**< Border size */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */ diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp deleted file mode 100644 index 7d5c5ba7e1..0000000000 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -BorderSize CLNonMaximaSuppression3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined); -} - -void CLNonMaximaSuppression3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::F32); - - _input = input; - _output = output; - - // Create kernel - std::set build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) }; - _kernel = create_kernel(compile_context, "non_max_suppression", build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h deleted file mode 100644 index d9ed60ce6b..0000000000 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H -#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL - * - * @note Used by @ref CLFastCorners and @ref CLHarrisCorners - */ -class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) - * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined); - /** Initialise the kernel's sources, destinations and border mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor) - * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor) - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */ diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp deleted file mode 100644 index 7ceddc9626..0000000000 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLScharr3x3Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include - -using namespace arm_compute; - -CLScharr3x3Kernel::CLScharr3x3Kernel() - : _run_scharr_x(false), _run_scharr_y(false), _input(nullptr), _output_x(nullptr), _output_y(nullptr) -{ -} - -BorderSize CLScharr3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); -} - -void CLScharr3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_scharr_x = output_x != nullptr; - _run_scharr_y = output_y != nullptr; - - if(_run_scharr_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_scharr_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - - // Set build options - std::set build_opts; - - if(_run_scharr_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_scharr_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - _kernel = create_kernel(compile_context, "scharr3x3", build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_x_access, output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); -} - -void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument_if((_run_scharr_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_scharr_y), idx, _output_y, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.h b/src/core/CL/kernels/CLScharr3x3Kernel.h deleted file mode 100644 index a670da5b6f..0000000000 --- a/src/core/CL/kernels/CLScharr3x3Kernel.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H -#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 Scharr filter on a tensor. - * - * @f[ - * \mathbf{G}_x=\begin{vmatrix} - * -3 & 0 & +3\\ - * -10& 0 & +10\\ - * -3 & 0 & +3 - * \end{vmatrix} - * @f] - * @f[ - * \mathbf{G}_y=\begin{vmatrix} - * -3 & -10 & -3\\ - * 0 & 0 & 0\\ - * +3 & +10 & +3 - * \end{vmatrix} - * @f] - */ -class CLScharr3x3Kernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLScharr3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default; - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - bool _run_scharr_x; /**< Do we need to run Scharr X ? */ - bool _run_scharr_y; /**< Do we need to run Scharr Y ? */ - const ICLTensor *_input; /**< Input image */ - ICLTensor *_output_x; /**< Output image for scharr X */ - ICLTensor *_output_y; /**< Output image for scharr Y */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp deleted file mode 100644 index a87677a38f..0000000000 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLSobel3x3Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -CLSobel3x3Kernel::CLSobel3x3Kernel() - : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false) -{ -} - -BorderSize CLSobel3x3Kernel::border_size() const -{ - return BorderSize(1); -} - -void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); -} - -void CLSobel3x3Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - - // Set build options - std::set build_opts; - - if(_run_sobel_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_sobel_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - const std::string kernel_name = std::string("sobel3x3"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_x_access, output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLSobel3x3Kernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.h b/src/core/CL/kernels/CLSobel3x3Kernel.h deleted file mode 100644 index fed8068762..0000000000 --- a/src/core/CL/kernels/CLSobel3x3Kernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H -#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */ -class CLSobel3x3Kernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel3x3Kernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default; - /** Default destructor */ - ~CLSobel3x3Kernel() = default; - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< Output tensor for Sobel X */ - ICLTensor *_output_y; /**< Output tensor for Sobel Y */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp deleted file mode 100644 index c450becd1d..0000000000 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLSobel5x5Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -CLSobel5x5HorKernel::CLSobel5x5HorKernel() - : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false), _border_size(0) -{ -} - -BorderSize CLSobel5x5HorKernel::border_size() const -{ - return _border_size; -} - -void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); -} - -void CLSobel5x5HorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - _border_size = BorderSize(border_undefined ? 0 : 2, 2); - - // Set build options - std::set build_opts; - - if(_run_sobel_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_sobel_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - const std::string kernel_name = std::string("sobel_separable1x5"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowHorizontal input_access(input->info(), -border_size().left, num_elems_read_per_iteration); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_x_access, output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLSobel5x5HorKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -CLSobel5x5VertKernel::CLSobel5x5VertKernel() - : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false) -{ -} - -BorderSize CLSobel5x5VertKernel::border_size() const -{ - return BorderSize{ 2, 0 }; -} - -void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined); -} - -void CLSobel5x5VertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_x, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S16); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_y, 1, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S16); - } - - _input_x = input_x; - _input_y = input_y; - _output_x = output_x; - _output_y = output_y; - - // Set build options - std::set build_opts; - - if(_run_sobel_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_sobel_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - const std::string kernel_name = std::string("sobel_separable5x1"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - const ICLTensor *input = _run_sobel_x ? _input_x : _input_y; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 5; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_x_access(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowRectangle input_y_access(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_x_access, input_y_access, output_x_access, output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLSobel5x5VertKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument_if((_run_sobel_x), idx, _input_x, slice); - add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _input_y, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice); - - _kernel.setArg(idx++, 0 /*dummy*/); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.h b/src/core/CL/kernels/CLSobel5x5Kernel.h deleted file mode 100644 index a163ac932a..0000000000 --- a/src/core/CL/kernels/CLSobel5x5Kernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H -#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */ -class CLSobel5x5HorKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel5x5HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default; - /** Default destructor */ - ~CLSobel5x5HorKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< X output of horizontal pass */ - ICLTensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */ -class CLSobel5x5VertKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel5x5VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default; - /** Default destructor */ - ~CLSobel5x5VertKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ - const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ - ICLTensor *_output_x; /**< X output of sobel */ - ICLTensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */ diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp deleted file mode 100644 index 1cfa74f7b3..0000000000 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLSobel7x7Kernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include - -using namespace arm_compute; - -CLSobel7x7HorKernel::CLSobel7x7HorKernel() - : _input(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false), _border_size(0) -{ -} - -BorderSize CLSobel7x7HorKernel::border_size() const -{ - return _border_size; -} - -void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined); -} - -void CLSobel7x7HorKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S32); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S32); - } - - _input = input; - _output_x = output_x; - _output_y = output_y; - _border_size = BorderSize(border_undefined ? 0 : 3, 3); - - // Construct kernel name - const std::string kernel_name = "sobel_separable1x7"; - - // Set build options - std::set build_opts; - - if(_run_sobel_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_sobel_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 16; - constexpr unsigned int num_elems_written_per_iteration = 8; - - Window win = calculate_max_window_horizontal(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowHorizontal input_access(input->info(), -border_size().left, num_elems_read_per_iteration); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_access, output_x_access, output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLSobel7x7HorKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - do - { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} - -CLSobel7x7VertKernel::CLSobel7x7VertKernel() - : _input_x(nullptr), _input_y(nullptr), _output_x(nullptr), _output_y(nullptr), _run_sobel_x(false), _run_sobel_y(false) -{ -} - -BorderSize CLSobel7x7VertKernel::border_size() const -{ - return BorderSize{ 3, 0 }; -} - -void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined); -} - -void CLSobel7x7VertKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr)); - - _run_sobel_x = output_x != nullptr; - _run_sobel_y = output_y != nullptr; - - if(_run_sobel_x) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_x, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_x, 1, DataType::S32); - } - - if(_run_sobel_y) - { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_y, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_y, 1, DataType::S32); - } - - _input_x = input_x; - _input_y = input_y; - _output_x = output_x; - _output_y = output_y; - - // Set build options - std::set build_opts; - - if(_run_sobel_x) - { - build_opts.insert("-DGRAD_X"); - } - - if(_run_sobel_y) - { - build_opts.insert("-DGRAD_Y"); - } - - // Create kernel - const std::string kernel_name = std::string("sobel_separable7x1"); - _kernel = create_kernel(compile_context, kernel_name, build_opts); - - const ICLTensor *input = _run_sobel_x ? _input_x : _input_y; - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 8; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 7; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - - AccessWindowRectangle input_x_access(input_x == nullptr ? nullptr : input_x->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowRectangle input_y_access(input_y == nullptr ? nullptr : input_y->info(), 0, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - AccessWindowHorizontal output_x_access(output_x == nullptr ? nullptr : output_x->info(), 0, num_elems_written_per_iteration); - AccessWindowHorizontal output_y_access(output_y == nullptr ? nullptr : output_y->info(), 0, num_elems_written_per_iteration); - - update_window_and_padding(win, input_x_access, input_y_access, output_x_access, output_y_access); - - output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(border_undefined); -} - -void CLSobel7x7VertKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do - { - unsigned int idx = 0; - - add_2D_tensor_argument_if((_run_sobel_x), idx, _input_x, slice); - add_2D_tensor_argument_if((_run_sobel_x), idx, _output_x, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _input_y, slice); - add_2D_tensor_argument_if((_run_sobel_y), idx, _output_y, slice); - - _kernel.setArg(idx++, 0 /*dummy*/); - - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_2D(slice)); -} diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.h b/src/core/CL/kernels/CLSobel7x7Kernel.h deleted file mode 100644 index c85f0aedf9..0000000000 --- a/src/core/CL/kernels/CLSobel7x7Kernel.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H -#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */ -class CLSobel7x7HorKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel7x7HorKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default; - /** Default destructor */ - ~CLSobel7x7HorKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; /**< Input tensor */ - ICLTensor *_output_x; /**< X output of horizontal pass */ - ICLTensor *_output_y; /**< Y output of horizontal pass */ - bool _run_sobel_x; /**< Do we need to run Sobel X ? */ - bool _run_sobel_y; /**< Do we need to run Sobel Y ? */ - BorderSize _border_size; /**< Border size */ -}; - -/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */ -class CLSobel7x7VertKernel : public ICLKernel -{ -public: - /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */ - CLSobel7x7VertKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete; - /** Allow instances of this class to be moved */ - CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default; - /** Allow instances of this class to be moved */ - CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default; - /** Default destructor */ - ~CLSobel7x7VertKernel() = default; - - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - /** Initialise the kernel's source, destination and border. - * - * @note At least one of output_x or output_y must be set and the corresponding input. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32. - * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32. - * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32. - * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */ - const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */ - ICLTensor *_output_x; /**< X output of sobel */ - ICLTensor *_output_y; /**< Y output of sobel */ - bool _run_sobel_x; /**< Do we need to run sobel X? */ - bool _run_sobel_y; /**< Do we need to run sobel Y? */ -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */ diff --git a/src/core/CL/kernels/CLTableLookupKernel.cpp b/src/core/CL/kernels/CLTableLookupKernel.cpp deleted file mode 100644 index b82f4c9889..0000000000 --- a/src/core/CL/kernels/CLTableLookupKernel.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLTableLookupKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLLut.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" - -#include -#include - -using namespace arm_compute; - -void CLTableLookupKernel::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, lut, output); -} - -void CLTableLookupKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16); - ARM_COMPUTE_ERROR_ON(lut == nullptr); - ARM_COMPUTE_ERROR_ON(DataType::U8 != lut->type() && DataType::S16 != lut->type()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - // Create kernel - std::string kernel_name = (DataType::S16 == lut->type()) ? "tablelookup_S16" : "tablelookup_U8"; - _kernel = create_kernel(compile_context, kernel_name); - - // Set lut argument - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, lut->cl_buffer()); - if(DataType::S16 == lut->type()) - { - _kernel.setArg(idx++, lut->index_offset()); - _kernel.setArg(idx++, static_cast(lut->num_elements())); - } - - // Configure kernel - constexpr unsigned int num_elems_processed_per_iteration = 8; - ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); -} diff --git a/src/core/CL/kernels/CLTableLookupKernel.h b/src/core/CL/kernels/CLTableLookupKernel.h deleted file mode 100644 index c8d15cbee2..0000000000 --- a/src/core/CL/kernels/CLTableLookupKernel.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H -#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H - -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; -class ICLLut; - -/** Interface for the kernel to perform table lookup calculations. */ -class CLTableLookupKernel : public ICLSimple2DKernel -{ -public: - /** Initialise the kernel's input, lut and output. - * - * @param[in] input An input tensor. Data types supported: U8, S16. - * @param[in] lut The input LUT. Data types supported: U8, S16. - * @param[out] output The output tensor. Data types supported: U8, S16. - */ - void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output); - /** Initialise the kernel's input, lut and output. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8, S16. - * @param[in] lut The input LUT. Data types supported: U8, S16. - * @param[out] output The output tensor. Data types supported: U8, S16. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output); -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */ diff --git a/src/core/CL/kernels/CLThresholdKernel.cpp b/src/core/CL/kernels/CLThresholdKernel.cpp deleted file mode 100644 index 72c22f043c..0000000000 --- a/src/core/CL/kernels/CLThresholdKernel.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLThresholdKernel.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/Window.h" - -#include - -namespace arm_compute -{ -void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, info); -} - -void CLThresholdKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - - // Construct kernel name - std::string kernel_name = "threshold"; - - switch(info.type) - { - case ThresholdType::BINARY: - kernel_name += "_binary"; - break; - case ThresholdType::RANGE: - kernel_name += "_range"; - break; - default: - ARM_COMPUTE_ERROR("Thresholding type not recognized"); - break; - } - - // Create kernel - _kernel = create_kernel(compile_context, kernel_name); - - // Set arguments - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, info.false_value); - _kernel.setArg(idx++, info.true_value); - _kernel.setArg(idx++, info.threshold); - - if(ThresholdType::RANGE == info.type) - { - _kernel.setArg(idx++, info.upper); - } - - // Make sure _kernel is initialized before calling the parent's configure - constexpr unsigned int num_elems_processed_per_iteration = 16; - ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLThresholdKernel.h b/src/core/CL/kernels/CLThresholdKernel.h deleted file mode 100644 index 511eaed1bf..0000000000 --- a/src/core/CL/kernels/CLThresholdKernel.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H -#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H - -#include "arm_compute/core/KernelDescriptors.h" -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -// Forward declarations -class ICLTensor; - -/** Interface for the thresholding kernel. */ -class CLThresholdKernel : public ICLSimple2DKernel -{ -public: - /**Initialise the kernel's input, output and threshold parameters. - * - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] info Threshold descriptor - */ - void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); - /**Initialise the kernel's input, output and threshold parameters. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input An input tensor. Data types supported: U8 - * @param[out] output The output tensor. Data types supported: U8. - * @param[in] info Threshold descriptor - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info); -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */ diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp deleted file mode 100644 index 600c67a528..0000000000 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWarpAffineKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -#include -#include -#include -#include - -namespace arm_compute -{ -namespace -{ -void options_add_matrix(std::set &options, const std::array &matrix) -{ - for(size_t i = 0; i < 6; ++i) - { - std::stringstream mat_str; - mat_str << "-DMAT" << i << "=" << matrix[i] << " "; - options.insert(mat_str.str()); - } -} -} // namespace - -BorderSize CLWarpAffineKernel::border_size() const -{ - return BorderSize(1); -} - -void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy); -} - -void CLWarpAffineKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(InterpolationPolicy::AREA == policy); - - _input = input; - _output = output; - - // Create build options - std::set options; - options_add_matrix(options, matrix); - options.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - - // Create kernel - std::string interpolation_name = string_from_interpolation_policy(policy); - std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); - const std::string kernel_name = "warp_affine_" + interpolation_name; - _kernel = create_kernel(compile_context, kernel_name, options); - - // Set static kernel arguments - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, input->info()->dimension(0)); - _kernel.setArg(idx++, input->info()->dimension(1)); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 4; - - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration); - const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0); - - AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); - - // Set config_id for enabling LWS tuning - _config_id = kernel_name; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(2)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(3)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(2)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(3)); - _config_id += "_"; - _config_id += lower_string(string_from_interpolation_policy(policy)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWarpAffineKernel.h b/src/core/CL/kernels/CLWarpAffineKernel.h deleted file mode 100644 index c600ee780d..0000000000 --- a/src/core/CL/kernels/CLWarpAffineKernel.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H -#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the warp affine kernel.*/ -class CLWarpAffineKernel : public ICLSimple2DKernel -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 2x3 of type float - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - */ - void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 2x3 of type float - * The matrix argument requires 9 values, the last 3 values are ignored. - * @param[in] policy The interpolation type. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */ diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp deleted file mode 100644 index 5f20a0bdd3..0000000000 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/OpenCL.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "src/core/AccessWindowStatic.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -using namespace arm_compute; - -namespace -{ -inline void options_add_matrix(std::set &options, const std::array &matrix) -{ - for(size_t i = 0; i < 9; ++i) - { - std::stringstream mat_str; - mat_str << "-DMAT" << i << "=" << matrix[i] << " "; - options.insert(mat_str.str()); - } -} -} // namespace - -BorderSize CLWarpPerspectiveKernel::border_size() const -{ - return BorderSize(1); -} - -void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy); -} - -void CLWarpPerspectiveKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON(InterpolationPolicy::AREA == policy); - - _input = input; - _output = output; - - // Create build options - std::set options; - options_add_matrix(options, matrix); - options.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - - // Create kernel - std::string interpolation_name = string_from_interpolation_policy(policy); - std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); - std::string kernel_name = "warp_perspective_" + interpolation_name; - _kernel = create_kernel(compile_context, kernel_name, options); - - // Set static kernel arguments - unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, input->info()->dimension(0)); - _kernel.setArg(idx++, input->info()->dimension(1)); - - // Configure kernel window - constexpr unsigned int num_elems_processed_per_iteration = 4; - - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, input->info()->dimension(0) + border_size().right, input->info()->dimension(1) + border_size().bottom); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.h b/src/core/CL/kernels/CLWarpPerspectiveKernel.h deleted file mode 100644 index dcbe1c5560..0000000000 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H -#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLSimple2DKernel.h" - -namespace arm_compute -{ -class ICLTensor; -/** Interface for the warp perspective kernel.*/ -class CLWarpPerspectiveKernel : public ICLSimple2DKernel -{ -public: - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - */ - void configure(const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - /** Initialize the function's source, destination, interpolation policy and border_mode. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[out] output Destination tensor, Data types supported: U8. - * @param[in] matrix The perspective matrix. Must be 3x3 of type float. - * @param[in] policy The interpolation type. - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array &matrix, InterpolationPolicy policy); - - // Inherited methods overridden: - BorderSize border_size() const override; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */ -- cgit v1.2.1