aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-12-03 20:37:43 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-12-08 11:56:12 +0000
commite2696b1f9bb28b69beff99f54addd48f60823ddb (patch)
tree68705f1cdff45e4d0c174b6037f6e5ff696717d2
parent8c3c0e7b117723bc98b6acc85565ffa521b10c0d (diff)
downloadComputeLibrary-e2696b1f9bb28b69beff99f54addd48f60823ddb.tar.gz
Wrap Flatten layer over reshape
Flatten layer is lowered into a Reshape layer. Remove (CL/NE)FlatternLayerKernel. Partially Resolves: COMPMID-3996 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: Id9e2ddfe2e2dd793541badff3490c05e4c908f88 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4660 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--Android.bp2
-rw-r--r--arm_compute/runtime/CL/functions/CLFlattenLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEFlattenLayer.h11
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h4
-rw-r--r--docs/00_introduction.dox4
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/CLKernels.h1
-rw-r--r--src/core/CL/cl_kernels/flatten.cl74
-rw-r--r--src/core/CL/kernels/CLFlattenLayerKernel.cpp137
-rw-r--r--src/core/CL/kernels/CLFlattenLayerKernel.h83
-rw-r--r--src/core/NEON/NEKernels.h1
-rw-r--r--src/core/NEON/kernels/NEFlattenLayerKernel.cpp138
-rw-r--r--src/core/NEON/kernels/NEFlattenLayerKernel.h81
-rw-r--r--src/runtime/CL/functions/CLFlattenLayer.cpp32
-rw-r--r--src/runtime/NEON/functions/NEFlattenLayer.cpp24
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp11
-rw-r--r--src/runtime/NEON/functions/NERNNLayer.cpp1
17 files changed, 67 insertions, 553 deletions
diff --git a/Android.bp b/Android.bp
index c5213f4f09..5a219610f8 100644
--- a/Android.bp
+++ b/Android.bp
@@ -123,7 +123,6 @@ cc_library_static {
"src/core/CL/kernels/CLFFTScaleKernel.cpp",
"src/core/CL/kernels/CLFastCornersKernel.cpp",
"src/core/CL/kernels/CLFillBorderKernel.cpp",
- "src/core/CL/kernels/CLFlattenLayerKernel.cpp",
"src/core/CL/kernels/CLFloorKernel.cpp",
"src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp",
"src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp",
@@ -274,7 +273,6 @@ cc_library_static {
"src/core/NEON/kernels/NEFastCornersKernel.cpp",
"src/core/NEON/kernels/NEFillArrayKernel.cpp",
"src/core/NEON/kernels/NEFillBorderKernel.cpp",
- "src/core/NEON/kernels/NEFlattenLayerKernel.cpp",
"src/core/NEON/kernels/NEFloorKernel.cpp",
"src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp",
"src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
index f5f4ff554f..ffe06aa610 100644
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
@@ -25,7 +25,8 @@
#define ARM_COMPUTE_CLFLATTENLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
+#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
@@ -38,7 +39,7 @@ class ITensorInfo;
* -# @ref CLFlattenLayerKernel
*
*/
-class CLFlattenLayer : public ICLSimpleFunction
+class CLFlattenLayer : public IFunction
{
public:
/** Initialise the kernel's input and output.
@@ -68,6 +69,12 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ CLReshapeLayer _reshape{};
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 9f0d5226de..1104aac77f 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -25,7 +25,8 @@
#define ARM_COMPUTE_NEFLATTENLAYER_H
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
namespace arm_compute
{
@@ -33,7 +34,7 @@ class ITensor;
class ITensorInfo;
/** Basic function to execute flatten layer kernel. */
-class NEFlattenLayer : public INESimpleFunctionNoBorder
+class NEFlattenLayer : public IFunction
{
public:
/** Initialise the kernel's input and output.
@@ -54,6 +55,12 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run() override;
+
+private:
+ NEReshapeLayer _reshape{};
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 0a7748a94b..8bf3e95f81 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -35,8 +35,6 @@
namespace arm_compute
{
-class NEFlattenLayerKernel;
-
/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
@@ -181,7 +179,7 @@ private:
MemoryGroup _memory_group;
IWeightsManager *_weights_manager;
- std::unique_ptr<NEFlattenLayerKernel> _flatten_kernel;
+ NEFlattenLayer _flatten;
NEConvertFullyConnectedWeights _convert_weights;
weights_transformations::NEConvertFullyConnectedWeightsManaged _convert_weights_managed;
NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 4baf33d7c1..7ad4831082 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -926,7 +926,7 @@ v18.11 Public major release
- Improved doxygen documentation adding a list of the existing functions.
- Add 4D tensors support to
- CLWidthConcatenateLayer
- - @ref CLFlattenLayer
+ - CLFlattenLayer
- @ref CLSoftmaxLayer
- Add dot product support for @ref CLDepthwiseConvolutionLayer3x3NHWCKernel non-unit stride
- Add SVE support
@@ -1185,7 +1185,7 @@ v17.09 Public major release
- @ref CLDepthwiseConvolutionLayer3x3NCHWKernel @ref CLDepthwiseConvolutionLayer3x3NHWCKernel CLDepthwiseIm2ColKernel CLDepthwiseVectorToTensorKernel CLDepthwiseWeightsReshapeKernel / CLDepthwiseConvolutionLayer3x3 @ref CLDepthwiseConvolutionLayer CLDepthwiseSeparableConvolutionLayer
- @ref CLDequantizationLayerKernel / @ref CLDequantizationLayer
- @ref CLDirectConvolutionLayerKernel / @ref CLDirectConvolutionLayer
- - @ref CLFlattenLayer
+ - CLFlattenLayer
- @ref CLFloorKernel / @ref CLFloor
- CLGEMMTranspose1xW
- @ref CLGEMMMatrixVectorMultiplyKernel
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index ae8b879be3..dadb3f4db1 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -198,7 +198,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "fill_image_borders_constant", "fill_border.cl" },
{ "fill_image_borders_replicate", "fill_border.cl" },
{ "finalize", "optical_flow_pyramid_lk.cl" },
- { "flatten", "flatten.cl" },
{ "floor_layer", "floor.cl" },
{ "fuse_batchnormalization_layer", "batchnormalization_layer.cl" },
{ "gather", "gather.cl" },
@@ -672,10 +671,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/fill_border.clembed"
},
{
- "flatten.cl",
-#include "./cl_kernels/flatten.clembed"
- },
- {
"floor.cl",
#include "./cl_kernels/floor.clembed"
},
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index eea90eb599..a9654ecc1f 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -70,7 +70,6 @@
#include "src/core/CL/kernels/CLFFTScaleKernel.h"
#include "src/core/CL/kernels/CLFastCornersKernel.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
#include "src/core/CL/kernels/CLFloorKernel.h"
#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
diff --git a/src/core/CL/cl_kernels/flatten.cl b/src/core/CL/cl_kernels/flatten.cl
deleted file mode 100644
index a1a2e4696b..0000000000
--- a/src/core/CL/cl_kernels/flatten.cl
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(SRC_DEPTH)
-
-/** This opencl kernel flattens the first 3 dimensions of the input tensor
- *
- * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
- * @note The width, height and depth of the input tensor must be passed at compile time using -DSRC_WIDTH, -DSRC_HEIGHT and -DSRC_DEPTH. e.g. -DSRC_WIDTH=24, -DSRC_HEIGHT=24, -DSRC_DEPTH=16
- * @note If the output has 3 dimensions, the 2nd dimension of the output tensor must be passed at compile time using -DDST_DIM1. e.g -DDST_DIM1=3
- *
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
- * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] src_step_w src_stride_w * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void flatten(
- TENSOR4D_DECLARATION(src),
- TENSOR3D_DECLARATION(dst))
-{
- Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, SRC_DEPTH);
-
- uint c = get_global_id(2) % SRC_DEPTH; // input feature map
- uint b0 = get_global_id(2) / SRC_DEPTH; // batch id
- uint b1 = 0;
-
-#if defined(DST_DIM1)
- uint b_tmp = b0;
- b0 = b_tmp % DST_DIM1; // batch id0
- b1 = b_tmp / DST_DIM1; // batch id1
-#endif // defined(DST_DIM1)
-
- __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes + (get_global_id(0) + get_global_id(1) * (uint)SRC_WIDTH + c * (uint)(SRC_WIDTH * SRC_HEIGHT)) * sizeof(
- DATA_TYPE) + b0 * dst_stride_y + b1 * dst_stride_z;
-
- *((__global DATA_TYPE *)output_ptr) = *((__global DATA_TYPE *)src.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) \ No newline at end of file
diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp
deleted file mode 100644
index b3f84b6928..0000000000
--- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
-
- // Checks performed when output is configured
- if(output->total_size() != 0)
- {
- const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_flatten_shape(input));
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- }
-
- return Status{};
-}
-} // namespace
-
-CLFlattenLayerKernel::CLFlattenLayerKernel()
- : _input(nullptr), _output(nullptr)
-{
-}
-
-void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLFlattenLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_flatten_shape(input->info())));
-
- auto padding_info = get_padding_info({ input, output });
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(data_size_from_type(input->info()->data_type())));
- build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
- build_opts.add_option_if(output->info()->num_dimensions() > 2, "-DDST_DIM1=" + support::cpp11::to_string(output->info()->dimension(1)));
-
- // Create kernel
- _kernel = create_kernel(compile_context, "flatten", build_opts.options());
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
- ICLKernel::configure_internal(win);
-
- output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-
- // Set config_id for enabling LWS tuning
- _config_id = "flatten";
- _config_id += "_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(2));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(output->info()->dimension(1));
-}
-
-Status CLFlattenLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
- return Status{};
-}
-
-void CLFlattenLayerKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window collapsed_window = window.collapse(ICLKernel::window(), Window::DimZ);
-
- Window output_window;
- output_window.use_tensor_dimensions(_output->info()->tensor_shape());
-
- // Run kernel
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, collapsed_window);
- add_3D_tensor_argument(idx, _output, output_window);
- enqueue(queue, *this, collapsed_window, lws_hint());
-}
-} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.h b/src/core/CL/kernels/CLFlattenLayerKernel.h
deleted file mode 100644
index 2471cf2e4a..0000000000
--- a/src/core/CL/kernels/CLFlattenLayerKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL interface for the flatten kernel.*/
-class CLFlattenLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFlattenLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 091130c23a..55aa514f36 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -69,7 +69,6 @@
#include "src/core/NEON/kernels/NEFastCornersKernel.h"
#include "src/core/NEON/kernels/NEFillArrayKernel.h"
#include "src/core/NEON/kernels/NEFillBorderKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "src/core/NEON/kernels/NEFloorKernel.h"
#include "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
deleted file mode 100644
index 8c0dc10ee8..0000000000
--- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "src/core/CPP/Validate.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-namespace arm_compute
-{
-using namespace misc::shape_calculator;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
-
- // Checks performed when output is configured
- if(output->total_size() != 0)
- {
- const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(compute_flatten_shape(input));
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_flatten_shape(input)));
-
- Window win = calculate_max_window(*input, Steps()); // Flatten does not need paddings
-
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
-
- return std::make_pair(Status{}, win);
-}
-} // namespace
-
-NEFlattenLayerKernel::NEFlattenLayerKernel()
- : _input(nullptr), _output(nullptr)
-{
-}
-
-void NEFlattenLayerKernel::configure(const ITensor *input, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- INEKernel::configure(win_config.second);
-}
-
-Status NEFlattenLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
- return Status{};
-}
-
-void NEFlattenLayerKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- const size_t in_width = _input->info()->dimension(0);
- const size_t in_height = _input->info()->dimension(1);
- const size_t out_step_x = in_width * _input->info()->element_size();
- const size_t out_step_y = out_step_x * in_height;
-
- Window in_window(window);
- in_window.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Window out_window;
- out_window.use_tensor_dimensions(_output->info()->tensor_shape());
- out_window.set(Window::DimX, Window::Dimension(out_window.x().start(), out_window.x().end(), in_width));
-
- Window in_slice = in_window.first_slice_window_3D();
- Window out_slice = out_window.first_slice_window_1D();
-
- do
- {
- Iterator in(_input, in_slice);
- Iterator out(_output, out_slice);
-
- uint8_t *out_ptr = out.ptr();
-
- execute_window_loop(in_slice, [&](const Coordinates & id)
- {
- memcpy(out_ptr + id.y() * out_step_x + id.z() * out_step_y, in.ptr(), out_step_x);
- },
- in);
- }
- while(in_window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
-}
-} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.h b/src/core/NEON/kernels/NEFlattenLayerKernel.h
deleted file mode 100644
index 5fd5f436b2..0000000000
--- a/src/core/NEON/kernels/NEFlattenLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-
-#include "src/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the flatten layer kernel. */
-class NEFlattenLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEFlattenLayerKernel";
- }
- /** Default constructor */
- NEFlattenLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default;
- /** Default destructor */
- ~NEFlattenLayerKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- const ITensor *_input;
- ITensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */
diff --git a/src/runtime/CL/functions/CLFlattenLayer.cpp b/src/runtime/CL/functions/CLFlattenLayer.cpp
index c10e91bf96..b2860ea24a 100644
--- a/src/runtime/CL/functions/CLFlattenLayer.cpp
+++ b/src/runtime/CL/functions/CLFlattenLayer.cpp
@@ -23,11 +23,16 @@
*/
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
-
-using namespace arm_compute;
+#include "src/core/helpers/AutoConfiguration.h"
+namespace arm_compute
+{
void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -35,13 +40,24 @@ void CLFlattenLayer::configure(const ICLTensor *input, ICLTensor *output)
void CLFlattenLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
- auto k = std::make_unique<CLFlattenLayerKernel>();
- k->configure(compile_context, input, output);
- _kernel = std::move(k);
- CLScheduler::get().tune_kernel_static(*_kernel);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input->info())));
+ _reshape.configure(compile_context, input, output);
}
Status CLFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
{
- return CLFlattenLayerKernel::validate(input, output);
+ // Checks performed when output is configured
+ if(output->total_size() != 0)
+ {
+ const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
+ }
+ return CLReshapeLayer::validate(input, output);
+}
+
+void CLFlattenLayer::run()
+{
+ _reshape.run();
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp
index 21e55665cd..c5aa162f0a 100644
--- a/src/runtime/NEON/functions/NEFlattenLayer.cpp
+++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp
@@ -23,20 +23,32 @@
*/
#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
-#include "arm_compute/core/Size2D.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
{
void NEFlattenLayer::configure(const ITensor *input, ITensor *output)
{
- auto k = std::make_unique<NEFlattenLayerKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input->info())));
+ _reshape.configure(input, output);
}
Status NEFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
{
- return NEFlattenLayerKernel::validate(input, output);
+ // Checks performed when output is configured
+ if(output->total_size() != 0)
+ {
+ const TensorInfo tensor_info_output = input->clone()->set_tensor_shape(misc::shape_calculator::compute_flatten_shape(input));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
+ }
+ return NEReshapeLayer::validate(input, output);
+}
+void NEFlattenLayer::run()
+{
+ _reshape.run();
}
} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index f12c410a59..ec782fc163 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -31,8 +31,6 @@
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
@@ -159,7 +157,7 @@ Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, c
NEFullyConnectedLayer::~NEFullyConnectedLayer() = default;
NEFullyConnectedLayer::NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
- : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten_kernel(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
+ : _memory_group(std::move(memory_manager)), _weights_manager(weights_manager), _flatten(), _convert_weights(), _convert_weights_managed(), _reshape_weights_function(),
_reshape_weights_managed_function(), _mm_gemm(nullptr, weights_manager), _mm_gemmlowp(nullptr, weights_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(),
_original_weights(nullptr), _are_weights_converted(true), _are_weights_reshaped(false), _is_fc_after_conv(false), _is_quantized_asymmetric(false), _is_prepared(false)
{
@@ -213,8 +211,7 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso
// Configure flatten kernel
_memory_group.manage(&_flatten_output);
- _flatten_kernel = std::make_unique<NEFlattenLayerKernel>();
- _flatten_kernel->configure(input, &_flatten_output);
+ _flatten.configure(input, &_flatten_output);
// Configure matrix multiply kernel
configure_mm(&_flatten_output, weights, biases, output, act);
@@ -392,7 +389,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2))));
// Validate flatten kernel
- ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &flatten_input));
+ ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayer::validate(input, &flatten_input));
input_to_use = &flatten_input;
}
else
@@ -415,7 +412,7 @@ void NEFullyConnectedLayer::run()
// Linearize input if it comes from a convolutional layer
if(_is_fc_after_conv)
{
- NEScheduler::get().schedule(_flatten_kernel.get(), Window::DimY);
+ _flatten.run();
}
// Run matrix multiply
diff --git a/src/runtime/NEON/functions/NERNNLayer.cpp b/src/runtime/NEON/functions/NERNNLayer.cpp
index c16d09f60c..93e37cc000 100644
--- a/src/runtime/NEON/functions/NERNNLayer.cpp
+++ b/src/runtime/NEON/functions/NERNNLayer.cpp
@@ -33,7 +33,6 @@
#include "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
#include "src/core/NEON/kernels/NECopyKernel.h"
-#include "src/core/NEON/kernels/NEFlattenLayerKernel.h"
#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"