From e2696b1f9bb28b69beff99f54addd48f60823ddb Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 Dec 2020 20:37:43 +0000 Subject: Wrap Flatten layer over reshape Flatten layer is lowered into a Reshape layer. Remove (CL/NE)FlatternLayerKernel. Partially Resolves: COMPMID-3996 Signed-off-by: Georgios Pinitas Change-Id: Id9e2ddfe2e2dd793541badff3490c05e4c908f88 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4660 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- Android.bp | 2 - arm_compute/runtime/CL/functions/CLFlattenLayer.h | 11 +- .../runtime/NEON/functions/NEFlattenLayer.h | 11 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 4 +- docs/00_introduction.dox | 4 +- src/core/CL/CLKernelLibrary.cpp | 5 - src/core/CL/CLKernels.h | 1 - src/core/CL/cl_kernels/flatten.cl | 74 ----------- src/core/CL/kernels/CLFlattenLayerKernel.cpp | 137 -------------------- src/core/CL/kernels/CLFlattenLayerKernel.h | 83 ------------- src/core/NEON/NEKernels.h | 1 - src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 138 --------------------- src/core/NEON/kernels/NEFlattenLayerKernel.h | 81 ------------ src/runtime/CL/functions/CLFlattenLayer.cpp | 32 +++-- src/runtime/NEON/functions/NEFlattenLayer.cpp | 24 +++- .../NEON/functions/NEFullyConnectedLayer.cpp | 11 +- src/runtime/NEON/functions/NERNNLayer.cpp | 1 - 17 files changed, 67 insertions(+), 553 deletions(-) delete mode 100644 src/core/CL/cl_kernels/flatten.cl delete mode 100644 src/core/CL/kernels/CLFlattenLayerKernel.cpp delete mode 100644 src/core/CL/kernels/CLFlattenLayerKernel.h delete mode 100644 src/core/NEON/kernels/NEFlattenLayerKernel.cpp delete mode 100644 src/core/NEON/kernels/NEFlattenLayerKernel.h diff --git a/Android.bp b/Android.bp index c5213f4f09..5a219610f8 100644 --- a/Android.bp +++ b/Android.bp @@ -123,7 +123,6 @@ cc_library_static { "src/core/CL/kernels/CLFFTScaleKernel.cpp", "src/core/CL/kernels/CLFastCornersKernel.cpp", "src/core/CL/kernels/CLFillBorderKernel.cpp", - "src/core/CL/kernels/CLFlattenLayerKernel.cpp", "src/core/CL/kernels/CLFloorKernel.cpp", "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp", "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp", @@ -274,7 +273,6 @@ cc_library_static { "src/core/NEON/kernels/NEFastCornersKernel.cpp", "src/core/NEON/kernels/NEFillArrayKernel.cpp", "src/core/NEON/kernels/NEFillBorderKernel.cpp", - "src/core/NEON/kernels/NEFlattenLayerKernel.cpp", "src/core/NEON/kernels/NEFloorKernel.cpp", "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp", "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp", diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h index f5f4ff554f..ffe06aa610 100644 --- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h +++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h @@ -25,7 +25,8 @@ #define ARM_COMPUTE_CLFLATTENLAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h" +#include "arm_compute/runtime/IFunction.h" namespace arm_compute { @@ -38,7 +39,7 @@ class ITensorInfo; * -# @ref CLFlattenLayerKernel * */ -class CLFlattenLayer : public ICLSimpleFunction +class CLFlattenLayer : public IFunction { public: /** Initialise the kernel's input and output. @@ -68,6 +69,12 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + CLReshapeLayer _reshape{}; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 9f0d5226de..1104aac77f 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -25,7 +25,8 @@ #define ARM_COMPUTE_NEFLATTENLAYER_H #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h" namespace arm_compute { @@ -33,7 +34,7 @@ class ITensor; class ITensorInfo; /** Basic function to execute flatten layer kernel. */ -class NEFlattenLayer : public INESimpleFunctionNoBorder +class NEFlattenLayer : public IFunction { public: /** Initialise the kernel's input and output. @@ -54,6 +55,12 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output); + + // Inherited methods overridden: + void run() override; + +private: + NEReshapeLayer _reshape{}; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 0a7748a94b..8bf3e95f81 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -35,8 +35,6 @@ namespace arm_compute { -class NEFlattenLayerKernel; - /** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. @@ -181,7 +179,7 @@ private: MemoryGroup _memory_group; IWeightsManager *_weights_manager; - std::unique_ptr _flatten_kernel; + NEFlattenLayer _flatten; NEConvertFullyConnectedWeights _convert_weights; weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed; NEFullyConnectedLayerReshapeWeights _reshape_weights_function; diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index 4baf33d7c1..7ad4831082 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -926,7 +926,7 @@ v18.11 Public major release - Improved doxygen documentation adding a list of the existing functions. - Add 4D tensors support to - CLWidthConcatenateLayer - - @ref CLFlattenLayer + - CLFlattenLayer - @ref CLSoftmaxLayer - Add dot product support for @ref CLDepthwiseConvolutionLayer3x3NHWCKernel non-unit stride - Add SVE support @@ -1185,7 +1185,7 @@ v17.09 Public major release - @ref CLDepthwiseConvolutionLayer3x3NCHWKernel @ref CLDepthwiseConvolutionLayer3x3NHWCKernel CLDepthwiseIm2ColKernel CLDepthwiseVectorToTensorKernel CLDepthwiseWeightsReshapeKernel / CLDepthwiseConvolutionLayer3x3 @ref CLDepthwiseConvolutionLayer CLDepthwiseSeparableConvolutionLayer - @ref CLDequantizationLayerKernel / @ref CLDequantizationLayer - @ref CLDirectConvolutionLayerKernel / @ref CLDirectConvolutionLayer - - @ref CLFlattenLayer + - CLFlattenLayer - @ref CLFloorKernel / @ref CLFloor - CLGEMMTranspose1xW - @ref CLGEMMMatrixVectorMultiplyKernel diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index ae8b879be3..dadb3f4db1 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -198,7 +198,6 @@ const std::map CLKernelLibrary::_kernel_program_map = { "fill_image_borders_constant", "fill_border.cl" }, { "fill_image_borders_replicate", "fill_border.cl" }, { "finalize", "optical_flow_pyramid_lk.cl" }, - { "flatten", "flatten.cl" }, { "floor_layer", "floor.cl" }, { "fuse_batchnormalization_layer", "batchnormalization_layer.cl" }, { "gather", "gather.cl" }, @@ -670,10 +669,6 @@ const std::map CLKernelLibrary::_program_source_map = { "fill_border.cl", #include "./cl_kernels/fill_border.clembed" - }, - { - "flatten.cl", -#include "./cl_kernels/flatten.clembed" }, { "floor.cl", diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h index eea90eb599..a9654ecc1f 100644 --- a/src/core/CL/CLKernels.h +++ b/src/core/CL/CLKernels.h @@ -70,7 +70,6 @@ #include "src/core/CL/kernels/CLFFTScaleKernel.h" #include "src/core/CL/kernels/CLFastCornersKernel.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" -#include "src/core/CL/kernels/CLFlattenLayerKernel.h" #include "src/core/CL/kernels/CLFloorKernel.h" #include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h" #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" diff --git a/src/core/CL/cl_kernels/flatten.cl b/src/core/CL/cl_kernels/flatten.cl deleted file mode 100644 index a1a2e4696b..0000000000 --- a/src/core/CL/cl_kernels/flatten.cl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(SRC_DEPTH) - -/** This opencl kernel flattens the first 3 dimensions of the input tensor - * - * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float - * @note The width, height and depth of the input tensor must be passed at compile time using -DSRC_WIDTH, -DSRC_HEIGHT and -DSRC_DEPTH. e.g. -DSRC_WIDTH=24, -DSRC_HEIGHT=24, -DSRC_DEPTH=16 - * @note If the output has 3 dimensions, the 2nd dimension of the output tensor must be passed at compile time using -DDST_DIM1. e.g -DDST_DIM1=3 - * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: All - * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] src_step_w src_stride_w * number of elements along Y processed per workitem(in bytes) - * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr - * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void flatten( - TENSOR4D_DECLARATION(src), - TENSOR3D_DECLARATION(dst)) -{ - Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, SRC_DEPTH); - - uint c = get_global_id(2) % SRC_DEPTH; // input feature map - uint b0 = get_global_id(2) / SRC_DEPTH; // batch id - uint b1 = 0; - -#if defined(DST_DIM1) - uint b_tmp = b0; - b0 = b_tmp % DST_DIM1; // batch id0 - b1 = b_tmp / DST_DIM1; // batch id1 -#endif // defined(DST_DIM1) - - __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes + (get_global_id(0) + get_global_id(1) * (uint)SRC_WIDTH + c * (uint)(SRC_WIDTH * SRC_HEIGHT)) * sizeof( - DATA_TYPE) + b0 * dst_stride_y + b1 * dst_stride_z; - - *((__global DATA_TYPE *)output_ptr) = *((__global DATA_TYPE *)src.ptr); -} -#endif // defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) \ No newline at end of file diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp deleted file mode 100644 index b3f84b6928..0000000000 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/CL/kernels/CLFlattenLayerKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "support/StringSupport.h" - -using namespace arm_compute::misc::shape_calculator; - -namespace arm_compute -{ -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - - // Checks performed when output is configured - if(output->total_size() != 0) - { - const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_flatten_shape(input)); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - } - - return Status{}; -} -} // namespace - -CLFlattenLayerKernel::CLFlattenLayerKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - configure(CLKernelLibrary::get().get_compile_context(), input, output); -} - -void CLFlattenLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_flatten_shape(input->info()))); - - auto padding_info = get_padding_info({ input, output }); - - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - CLBuildOptions build_opts; - build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type()))); - build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); - build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); - build_opts.add_option_if(output->info()->num_dimensions() > 2, "-DDST_DIM1=" + support::cpp11::to_string(output->info()->dimension(1))); - - // Create kernel - _kernel = create_kernel(compile_context, "flatten", build_opts.options()); - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - ICLKernel::configure_internal(win); - - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - - ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); - - // Set config_id for enabling LWS tuning - _config_id = "flatten"; - _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(2)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(output->info()->dimension(1)); -} - -Status CLFlattenLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - return Status{}; -} - -void CLFlattenLayerKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window collapsed_window = window.collapse(ICLKernel::window(), Window::DimZ); - - Window output_window; - output_window.use_tensor_dimensions(_output->info()->tensor_shape()); - - // Run kernel - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, collapsed_window); - add_3D_tensor_argument(idx, _output, output_window); - enqueue(queue, *this, collapsed_window, lws_hint()); -} -} // namespace arm_compute diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.h b/src/core/CL/kernels/CLFlattenLayerKernel.h deleted file mode 100644 index 2471cf2e4a..0000000000 --- a/src/core/CL/kernels/CLFlattenLayerKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H -#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL interface for the flatten kernel.*/ -class CLFlattenLayerKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLFlattenLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const ICLTensor *input, ICLTensor *output); - /** Set the input and output of the kernel. - * - * @param[in] compile_context The compile context to be used. - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All. - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */ diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h index 091130c23a..55aa514f36 100644 --- a/src/core/NEON/NEKernels.h +++ b/src/core/NEON/NEKernels.h @@ -69,7 +69,6 @@ #include "src/core/NEON/kernels/NEFastCornersKernel.h" #include "src/core/NEON/kernels/NEFillArrayKernel.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "src/core/NEON/kernels/NEFloorKernel.h" #include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp deleted file mode 100644 index 8c0dc10ee8..0000000000 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "src/core/CPP/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -namespace arm_compute -{ -using namespace misc::shape_calculator; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); - // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - - // Checks performed when output is configured - if(output->total_size() != 0) - { - const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_flatten_shape(input)); - - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - } - - return Status{}; -} - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_flatten_shape(input))); - - Window win = calculate_max_window(*input, Steps()); // Flatten does not need paddings - - output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape())); - - return std::make_pair(Status{}, win); -} -} // namespace - -NEFlattenLayerKernel::NEFlattenLayerKernel() - : _input(nullptr), _output(nullptr) -{ -} - -void NEFlattenLayerKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - INEKernel::configure(win_config.second); -} - -Status NEFlattenLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); - return Status{}; -} - -void NEFlattenLayerKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - const size_t in_width = _input->info()->dimension(0); - const size_t in_height = _input->info()->dimension(1); - const size_t out_step_x = in_width * _input->info()->element_size(); - const size_t out_step_y = out_step_x * in_height; - - Window in_window(window); - in_window.set(Window::DimX, Window::Dimension(0, 1, 1)); - - Window out_window; - out_window.use_tensor_dimensions(_output->info()->tensor_shape()); - out_window.set(Window::DimX, Window::Dimension(out_window.x().start(), out_window.x().end(), in_width)); - - Window in_slice = in_window.first_slice_window_3D(); - Window out_slice = out_window.first_slice_window_1D(); - - do - { - Iterator in(_input, in_slice); - Iterator out(_output, out_slice); - - uint8_t *out_ptr = out.ptr(); - - execute_window_loop(in_slice, [&](const Coordinates & id) - { - memcpy(out_ptr + id.y() * out_step_x + id.z() * out_step_y, in.ptr(), out_step_x); - }, - in); - } - while(in_window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice)); -} -} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.h b/src/core/NEON/kernels/NEFlattenLayerKernel.h deleted file mode 100644 index 5fd5f436b2..0000000000 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H -#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H - -#include "src/core/NEON/INEKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** Interface for the flatten layer kernel. */ -class NEFlattenLayerKernel : public INEKernel -{ -public: - const char *name() const override - { - return "NEFlattenLayerKernel"; - } - /** Default constructor */ - NEFlattenLayerKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete; - /** Allow instances of this class to be moved */ - NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default; - /** Allow instances of this class to be moved */ - NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default; - /** Default destructor */ - ~NEFlattenLayerKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - */ - void configure(const ITensor *input, ITensor *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel - * - * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: All - * @param[out] output Output tensor with shape [w*h*d, input_batches] where: - * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - -private: - const ITensor *_input; - ITensor *_output; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */ diff --git a/src/runtime/CL/functions/CLFlattenLayer.cpp b/src/runtime/CL/functions/CLFlattenLayer.cpp index c10e91bf96..b2860ea24a 100644 --- a/src/runtime/CL/functions/CLFlattenLayer.cpp +++ b/src/runtime/CL/functions/CLFlattenLayer.cpp @@ -23,11 +23,16 @@ */ #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "src/core/CL/kernels/CLFlattenLayerKernel.h" - -using namespace arm_compute; +#include "src/core/helpers/AutoConfiguration.h" +namespace arm_compute +{ void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output) { configure(CLKernelLibrary::get().get_compile_context(), input, output); @@ -35,13 +40,24 @@ void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output) void CLFlattenLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output) { - auto k = std::make_unique(); - k->configure(compile_context, input, output); - _kernel = std::move(k); - CLScheduler::get().tune_kernel_static(*_kernel); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input->info()))); + _reshape.configure(compile_context, input, output); } Status CLFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return CLFlattenLayerKernel::validate(input, output); + // Checks performed when output is configured + if(output->total_size() != 0) + { + const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); + } + return CLReshapeLayer::validate(input, output); +} + +void CLFlattenLayer::run() +{ + _reshape.run(); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp index 21e55665cd..c5aa162f0a 100644 --- a/src/runtime/NEON/functions/NEFlattenLayer.cpp +++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp @@ -23,20 +23,32 @@ */ #include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h" -#include "arm_compute/core/Size2D.h" -#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { void NEFlattenLayer::configure(const ITensor *input, ITensor *output) { - auto k = std::make_unique(); - k->configure(input, output); - _kernel = std::move(k); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input->info()))); + _reshape.configure(input, output); } Status NEFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { - return NEFlattenLayerKernel::validate(input, output); + // Checks performed when output is configured + if(output->total_size() != 0) + { + const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); + } + return NEReshapeLayer::validate(input, output); +} +void NEFlattenLayer::run() +{ + _reshape.run(); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index f12c410a59..ec782fc163 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -31,8 +31,6 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" -#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" @@ -159,7 +157,7 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c NEFullyConnectedLayer::~NEFullyConnectedLayer() = default; NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(), + : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(), _reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false) { @@ -213,8 +211,7 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso // Configure flatten kernel _memory_group.manage(&_flatten_output); - _flatten_kernel = std::make_unique(); - _flatten_kernel->configure(input, &_flatten_output); + _flatten.configure(input, &_flatten_output); // Configure matrix multiply kernel configure_mm(&_flatten_output, weights, biases, output, act); @@ -392,7 +389,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2)))); // Validate flatten kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input)); + ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input)); input_to_use = &flatten_input; } else @@ -415,7 +412,7 @@ void NEFullyConnectedLayer::run() // Linearize input if it comes from a convolutional layer if(_is_fc_after_conv) { - NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY); + _flatten.run(); } // Run matrix multiply diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp index c16d09f60c..93e37cc000 100644 --- a/src/runtime/NEON/functions/NERNNLayer.cpp +++ b/src/runtime/NEON/functions/NERNNLayer.cpp @@ -33,7 +33,6 @@ #include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h" #include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h" #include "src/core/NEON/kernels/NECopyKernel.h" -#include "src/core/NEON/kernels/NEFlattenLayerKernel.h" #include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" -- cgit v1.2.1