aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-04-08 12:02:58 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2021-04-19 13:45:08 +0000
commit60c3b0e6821a80d78ffca5be30e05d062d071cd2 (patch)
tree3e263a45aa9617cfd7704b2b33ea4337f1582321 /arm_compute
parent4f1650f0c9919f0bac5024b8e31c0f754d25aec3 (diff)
downloadComputeLibrary-60c3b0e6821a80d78ffca5be30e05d062d071cd2.tar.gz
Port DepthwiseConvolution to new API
Resolves: COMPMID-4185 Change-Id: Ib5f22356356a022d567bb18d44ea272b62d10ebf Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5424 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/Types.h13
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h64
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h123
4 files changed, 29 insertions, 186 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 53333ff608..b1f340d18e 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1861,6 +1861,19 @@ private:
bool _broadcast_bias;
};
+struct ConvolutionInfo
+{
+ ConvolutionInfo() = default;
+ ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
+ : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
+ {
+ }
+ PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */
+ unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
+ ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */
+ Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
+};
+
struct DepthwiseConvolutionReshapeInfo
{
unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 56038dd853..ba37f9a61e 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -435,16 +435,13 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input)
/** Calculate the depthwise convolution output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] weights Weights tensor info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth.
- * @param[in] dilation Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor info
+ * @param[in] info Convolution info
*
* @return the calculated shape
*/
-inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U,
- 1U))
+inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
{
const TensorShape input_shape{ input.tensor_shape() };
const TensorShape weights_shape{ weights.tensor_shape() };
@@ -462,12 +459,12 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
unsigned int output_height = 0;
std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
weights_shape[weights_width_idx], weights_shape[weights_height_idx],
- conv_info, dilation);
+ info.pad_stride_info, info.dilation);
TensorShape output_shape{ input_shape };
output_shape.set(width_idx, output_width);
output_shape.set(height_idx, output_height);
- output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier);
+ output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier);
return output_shape;
}
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 9aa8f04eb8..c74b2a93ee 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
#include <memory>
namespace arm_compute
@@ -91,25 +92,6 @@ public:
void prepare() override;
private:
- /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a Depthwise Convolution Function
- */
- static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-
/** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
*
* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
@@ -173,23 +155,9 @@ private:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
@@ -248,26 +216,14 @@ private:
// Inherited methods overriden:
void run() override;
- void prepare() override;
private:
- std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_nchw;
- bool _is_activationlayer_enabled;
- const ITensor *_original_weights;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-
- DepthwiseConvolutionFunction _depth_conv_func;
- NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
- NEDepthwiseConvolutionLayerGeneric _func_generic;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
deleted file mode 100644
index 7f63717b02..0000000000
--- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-/** Depthwise convolution assembly kernel glue */
-class NEDepthwiseConvolutionAssemblyDispatch : public IFunction
-{
-public:
- /** Default constructor
- *
- * @param[in,out] memory_manager Memory manager to use
- */
- NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move constructor */
- NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move assignment operator */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Default destructor */
- ~NEDepthwiseConvolutionAssemblyDispatch();
- /** Initialize the function's source, destination, kernels and border_size.
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return An error status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Check if the optimized kernel can be used for the given kernel sizes and strides
- *
- * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC
- *
- * @param[in] input Input tensor info.
- * @param[in] weights Weights tensor info.
- * @param[in] conv_info Convolution layer metadata.
- * @param[in] depth_multiplier (Optional) Depth multiplier to be used.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed.
- */
- static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1));
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- struct LocalImpl;
-
-private:
- MemoryGroup _memory_group;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_bias;
- ITensor *_output;
- Tensor _packed_weights;
- Tensor _workspace;
- bool _is_prepared;
- std::unique_ptr<LocalImpl> _pImpl;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */