From 7d61ff041826782d14e67b7f5b7a2864905ff38b Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 18 Jan 2021 21:15:59 +0000 Subject: Make all CL Concatenate kernels and functions state-less Resolves COMPMID-3995 Change-Id: I84172bed20924f1d9ae3b4d14d7b321e9494296e Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4887 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/core/CL/CLKernels.h | 8 +- .../CL/kernels/CLBatchConcatenateLayerKernel.cpp | 150 ----------------- .../CL/kernels/CLBatchConcatenateLayerKernel.h | 82 ---------- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 135 ---------------- .../CL/kernels/CLDepthConcatenateLayerKernel.h | 80 --------- .../CL/kernels/CLHeightConcatenateLayerKernel.cpp | 128 --------------- .../CL/kernels/CLHeightConcatenateLayerKernel.h | 77 --------- .../kernels/CLWidthConcatenate2TensorsKernel.cpp | 150 ----------------- .../CL/kernels/CLWidthConcatenate2TensorsKernel.h | 73 --------- .../kernels/CLWidthConcatenate4TensorsKernel.cpp | 180 --------------------- .../CL/kernels/CLWidthConcatenate4TensorsKernel.h | 77 --------- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 123 -------------- .../CL/kernels/CLWidthConcatenateLayerKernel.h | 74 --------- 13 files changed, 1 insertion(+), 1336 deletions(-) delete mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLBatchConcatenateLayerKernel.h delete mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.h delete mode 100644 src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLHeightConcatenateLayerKernel.h delete mode 100644 src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp delete mode 100644 src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h delete mode 100644 src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp delete mode 100644 src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h delete mode 100644 src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLWidthConcatenateLayerKernel.h (limited to 'src/core/CL') diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h index f23871d4db..11f1d2d7cf 100644 --- a/src/core/CL/CLKernels.h +++ b/src/core/CL/CLKernels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 Arm Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,6 @@ #include "src/core/CL/kernels/CLAccumulateKernel.h" #include "src/core/CL/kernels/CLActivationLayerKernel.h" #include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h" -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h" #include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h" #include "src/core/CL/kernels/CLBitwiseKernel.h" @@ -48,7 +47,6 @@ #include "src/core/CL/kernels/CLCropKernel.h" #include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" #include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h" -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h" #include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" @@ -92,7 +90,6 @@ #include "src/core/CL/kernels/CLHOGDescriptorKernel.h" #include "src/core/CL/kernels/CLHOGDetectorKernel.h" #include "src/core/CL/kernels/CLHarrisCornersKernel.h" -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLHistogramKernel.h" #include "src/core/CL/kernels/CLIm2ColKernel.h" #include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" @@ -144,9 +141,6 @@ #include "src/core/CL/kernels/CLWarpAffineKernel.h" #include "src/core/CL/kernels/CLWarpPerspectiveKernel.h" #include "src/core/CL/kernels/CLWeightsReshapeKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" #include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "src/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h" diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp deleted file mode 100644 index ccd6a5a0fc..0000000000 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output); - - return Status{}; -} -} // namespace - -CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel() - : _batch_offset(0) -{ -} - -void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - _batch_offset = batch_offset; - - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); - - // Configure kernel window - auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1)); - ICLKernel::configure_internal(win); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - // Set config_id for enabling LWS tuning - _config_id = "concatenate_"; - _config_id += support::cpp11::to_string(3); - _config_id += "_"; - _config_id += support::cpp11::to_string(batch_offset); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(2)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->dimension(3)); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, - unsigned int batch_offset, - const arm_compute::ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output)); - return Status{}; -} - -void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - Window slice = window.first_slice_window_3D(); - - const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3]; - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx, offset_to_first_elements_in_bytes); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, src, slice); - add_3D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h deleted file mode 100644 index 54a89eb243..0000000000 --- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the batch concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLBatchConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLBatchConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] batch_offset The offset on axis # 3. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _batch_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp deleted file mode 100644 index eb5bfc2d86..0000000000 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY)); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output); - - return Status{}; -} -} // namespace - -CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel() - : _depth_offset(0) -{ -} - -void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - _depth_offset = depth_offset; - - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0)); - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate", build_opts.options()); - - // Configure kernel window - auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - win.set(Window::DimZ, Window::Dimension(0, input->tensor_shape().z(), 1)); - ICLKernel::configure_internal(win); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, - unsigned int depth_offset, - const arm_compute::ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output)); - return Status{}; -} - -void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - Window slice = window.first_slice_window_3D(); - - const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2]; - - unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters - _kernel.setArg(idx, offset_to_first_elements_in_bytes); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, src, slice); - add_3D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, slice, lws_hint()); - } - while(window.slide_window_slice_3D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h deleted file mode 100644 index 6c73bd4bf4..0000000000 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H -#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the depth concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLDepthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLDepthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] depth_offset The offset on the Z axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - * @note: The output tensor's low two dimensions can't be smaller than the input one's. - * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] depth_offset The offset on the Z axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _depth_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */ diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp deleted file mode 100644 index 8aa7366d50..0000000000 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY)); - - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != output->dimension(0)); - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -CLHeightConcatenateLayerKernel::CLHeightConcatenateLayerKernel() - : _height_offset(0) -{ -} - -Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output)); - return Status{}; -} - -void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - _height_offset = height_offset; - - // Add build options - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->dimension(0)); - - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2))); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options()); - // Configure kernel window - - // The window needs to be based on input as we copy all the heights of input - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - unsigned int idx = 0; - add_4D_tensor_argument(idx, src, window); - add_4D_tensor_argument(idx, dst, window); - enqueue(queue, *this, window, lws_hint()); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h deleted file mode 100644 index f4cb627052..0000000000 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2019-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the height concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLHeightConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLHeightConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLHeightConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] height_offset The starting offset on the Y axis for the output tensor. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; - -private: - unsigned int _height_offset; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp deleted file mode 100644 index d6697ba46b..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/helpers/tensor_info.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) > output->dimension(0)); - - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output)); - return Status{}; -} - -void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output)); - - auto padding_info = get_padding_info({ input1, input2, output }); - - const unsigned int min_dimension = std::min(input1->dimension(0), input2->dimension(0)); - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); - const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration; - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2))); - build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0))); - build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0))); - build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - - // If input have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2); - if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo) - { - const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform(); - const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options()); - - // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); - - // Set config_id for enabling LWS tuning - _config_id = "concatenate_width_x2_"; - _config_id += lower_string(string_from_data_type(input1->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(1)); -} - -void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_4D(); - - const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, src0, slice); - add_4D_tensor_argument(idx, src1, slice); - add_4D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, window, lws_hint()); - } - while(window.slide_window_slice_4D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h deleted file mode 100644 index 2af89e12eb..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 2 tensors. - * The input1 and input2 tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate2TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate2TensorsKernel() = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate2TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp deleted file mode 100644 index 7ecdd30224..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/helpers/tensor_info.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, input3, input4, output); - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) + input3->dimension(0) + input4->dimension(0) > output->dimension(0)); - - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input3->dimension(i) != output->dimension(i)); - ARM_COMPUTE_RETURN_ERROR_ON(input4->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -CLWidthConcatenate4TensorsKernel::CLWidthConcatenate4TensorsKernel() -{ -} - -Status CLWidthConcatenate4TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, input3, input4, output)); - return Status{}; -} - -void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context, - ITensorInfo *input1, ITensorInfo *input2, - ITensorInfo *input3, ITensorInfo *input4, - ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, input3, input4, output)); - - auto padding_info = get_padding_info({ input1, input2, input3, input4, output }); - const unsigned int min_dimension = std::min(std::min(input1->dimension(0), input2->dimension(0)), std::min(input3->dimension(0), input4->dimension(0))); - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); - const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration; - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2))); - build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0))); - build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0))); - build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(input3->dimension(0))); - build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(input4->dimension(0))); - build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) + input3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - - // If input have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2, input3, input4); - if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo) - { - const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform(); - const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform(); - const UniformQuantizationInfo iq3_info = input3->quantization_info().uniform(); - const UniformQuantizationInfo iq4_info = input4->quantization_info().uniform(); - const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); - build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset)); - build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale)); - build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset)); - build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options()); - - // Configure kernel window - Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); - - // Set config_id for enabling LWS tuning - _config_id = "concatenate_width_x4_"; - _config_id += lower_string(string_from_data_type(input1->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input1->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input2->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input3->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input3->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input4->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input4->dimension(1)); -} - -void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src0 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); - const auto src2 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2)); - const auto src3 = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - Window slice = window.first_slice_window_4D(); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, src0, slice); - add_4D_tensor_argument(idx, src1, slice); - add_4D_tensor_argument(idx, src2, slice); - add_4D_tensor_argument(idx, src3, slice); - add_4D_tensor_argument(idx, dst, slice); - enqueue(queue, *this, window, lws_hint()); - } - while(window.slide_window_slice_4D(slice)); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h deleted file mode 100644 index 0caf87114d..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel of 4 tensors. - * All input tensors will be concatenated into the output tensor. - */ -class CLWidthConcatenate4TensorsKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenate4TensorsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenate4TensorsKernel() = default; - /** Initialise the kernel's input1s and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input1 First input tensor. Data types supported: All. - * @param[in] input2 Second input tensor. Data types supported: same as @p input1 - * @param[in] input3 Third input tensor. Data types supported: same as @p input1 - * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1 - * @param[out] output Output tensor. Data types supported: Same as @p input1. - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel - * - * @param[in] input1 First tensor info. Data types supported: All. - * @param[in] input2 Second tensor info. Data types supported: same as @p input1 - * @param[in] input3 Third tensor info. Data types supported: same as @p input1 - * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1 - * @param[in] output Output tensor info. Data types supported: Same as @p input1. - * - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */ diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp deleted file mode 100644 index 30d0a481bd..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Utils.h" -#include "src/core/CL/CLValidate.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/Cast.h" - -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0)); - - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); - } - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - - return Status{}; -} -} // namespace - -CLWidthConcatenateLayerKernel::CLWidthConcatenateLayerKernel() -{ -} - -Status CLWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, width_offset, output)); - return Status{}; -} - -void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output)); - - auto padding_info = get_padding_info({ input, output }); - - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, input->dimension(0)); - - // Add build options - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type())); - build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); - build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration)); - build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset)); - build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2))); - - if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info()) - { - const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); - const UniformQuantizationInfo oqinfo = output->quantization_info().uniform(); - - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); - } - - // Create kernel - _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options()); - // Configure kernel window - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - ICLKernel::configure_internal(win.collapse(win, Window::DimZ)); - - // Set output valid region - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); -} - -void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const auto src = utils::cast::polymorphic_downcast(tensors.get_const_tensor(TensorType::ACL_SRC)); - auto dst = utils::cast::polymorphic_downcast(tensors.get_tensor(TensorType::ACL_DST)); - - unsigned int idx = 0; - add_4D_tensor_argument(idx, src, window); - add_4D_tensor_argument(idx, dst, window); - enqueue(queue, *this, window, lws_hint()); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h deleted file mode 100644 index 09c3f4455d..0000000000 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H -#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H - -#include "arm_compute/core/Types.h" -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -/** Interface for the width concatenate kernel. - * The input tensor will be concatenated into the output tensor. - */ -class CLWidthConcatenateLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLWidthConcatenateLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default; - /** Default destructor */ - ~CLWidthConcatenateLayerKernel() = default; - /** Initialise the kernel's inputs and output - * - * @param[in] compile_context The compile context to be used. - * @param[in] input Input tensor. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in,out] output Output tensor. Data types supported: Same as @p input. - * - */ - void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel - * - * @param[in] input Input tensor info. Data types supported: All. - * @param[in] width_offset The offset on the X axis. - * @param[in] output Output tensor info. Data types supported: Same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output); - - // Inherited methods overridden: - void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */ -- cgit v1.2.1