aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-09-26 17:18:26 +0100
committerManuel Bottini <manuel.bottini@arm.com>2019-10-23 16:56:45 +0000
commit05069f07bcf95676597698a79926327555276362 (patch)
treea4a861127660aa439c9468da7479d92cecc85138 /arm_compute
parente36b5266e4c6593932432bc0289e431d007b8710 (diff)
downloadComputeLibrary-05069f07bcf95676597698a79926327555276362.tar.gz
COMPMID-2515: Merge optimized depthwise convolution to the generic depthwise convolution function
3RDPARTY_UPDATE Change-Id: Iff9e915c5329c617527b6f5042979f4e21a8b2b8 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/2022 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/Types.h7
-rw-r--r--arm_compute/graph/TypePrinter.h3
-rw-r--r--arm_compute/graph/backends/FunctionHelpers.h25
-rw-r--r--arm_compute/graph/backends/ValidateHelpers.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h298
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h415
6 files changed, 461 insertions, 295 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 9641089e7b..d7b47ac512 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -139,6 +139,13 @@ enum class ConvolutionMethod
FFT /**< Convolution using FFT */
};
+/** Available DepthwiseConvolutionFunction*/
+enum class DepthwiseConvolutionFunction
+{
+ OPTIMIZED, /**< Optimized Depthwise Convolution */
+ GENERIC, /**< Generic Depthwise Convolution */
+};
+
/** Available DeconvolutionMethod*/
enum class DeconvolutionMethod
{
diff --git a/arm_compute/graph/TypePrinter.h b/arm_compute/graph/TypePrinter.h
index e4188125b9..131fd39277 100644
--- a/arm_compute/graph/TypePrinter.h
+++ b/arm_compute/graph/TypePrinter.h
@@ -251,9 +251,6 @@ inline ::std::ostream &operator<<(::std::ostream &os, const DepthwiseConvolution
case DepthwiseConvolutionMethod::Default:
os << "DEFAULT";
break;
- case DepthwiseConvolutionMethod::GEMV:
- os << "GEMV";
- break;
case DepthwiseConvolutionMethod::Optimized3x3:
os << "Optimized3x3";
break;
diff --git a/arm_compute/graph/backends/FunctionHelpers.h b/arm_compute/graph/backends/FunctionHelpers.h
index 94b385e81e..ee257e3abf 100644
--- a/arm_compute/graph/backends/FunctionHelpers.h
+++ b/arm_compute/graph/backends/FunctionHelpers.h
@@ -538,7 +538,7 @@ std::unique_ptr<IFunction> create_deconvolution_layer(DeconvolutionLayerNode &no
*
* @return Backend depth-wise convolution layer function
*/
-template <typename DepthwiseConvolutionLayerFunctions, typename TargetInfo>
+template <typename DepthwiseConvolutionLayer, typename TargetInfo>
std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
{
validate_node<TargetInfo>(node, 3 /* expected inputs */, 1 /* expected outputs */);
@@ -556,26 +556,17 @@ std::unique_ptr<IFunction> create_depthwise_convolution_layer(DepthwiseConvoluti
biases->info()->set_data_type(DataType::S32);
}
- const PadStrideInfo conv_info = node.convolution_info();
- const DepthwiseConvolutionMethod dwc_algorithm = node.depthwise_convolution_method();
- const unsigned int depth_multiplier = node.depth_multiplier();
- const ActivationLayerInfo fused_act = node.fused_activation();
+ const PadStrideInfo conv_info = node.convolution_info();
+ const unsigned int depth_multiplier = node.depth_multiplier();
+ const ActivationLayerInfo fused_act = node.fused_activation();
// Create and configure function (we assume that functions have been validated before creation)
std::unique_ptr<IFunction> func;
std::string func_name;
- if(dwc_algorithm == DepthwiseConvolutionMethod::Optimized3x3)
- {
- std::tie(func, func_name) = create_named_function<typename DepthwiseConvolutionLayerFunctions::OptimizedDepthwiseConvolutionLayer>(
- std::string("DepthwiseConvolutionLayer3x3"),
- input, weights, biases, output, conv_info, depth_multiplier, fused_act);
- }
- else
- {
- std::tie(func, func_name) = create_named_function<typename DepthwiseConvolutionLayerFunctions::GenericDepthwiseConvolutionLayer>(
- std::string("DepthwiseConvolutionLayer"),
- input, weights, biases, output, conv_info, depth_multiplier, fused_act);
- }
+
+ std::tie(func, func_name) = create_named_function<DepthwiseConvolutionLayer>(
+ std::string("DepthwiseConvolutionLayer"),
+ input, weights, biases, output, conv_info, depth_multiplier, fused_act);
// Log info
std::ostringstream qss;
diff --git a/arm_compute/graph/backends/ValidateHelpers.h b/arm_compute/graph/backends/ValidateHelpers.h
index 13de273bdf..9170006d9c 100644
--- a/arm_compute/graph/backends/ValidateHelpers.h
+++ b/arm_compute/graph/backends/ValidateHelpers.h
@@ -163,13 +163,12 @@ Status validate_convolution_layer(ConvolutionLayerNode &node)
/** Validates a Depthwise Convolution layer node
*
* @tparam DepthwiseConvolutionLayer Default Depthwise Convolution layer type
- * @tparam DepthwiseConvolutionLayer3x3 Optimized 3x3 Depthwise Convolution layer type
*
* @param[in] node Node to validate
*
* @return Status
*/
-template <typename DepthwiseConvolutionLayer, typename DepthwiseConvolutionLayer3x3>
+template <typename DepthwiseConvolutionLayer>
Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
{
ARM_COMPUTE_LOG_GRAPH_VERBOSE("Validating DepthwiseConvolutionLayer node with ID : " << node.id() << " and Name: " << node.name() << std::endl);
@@ -191,11 +190,8 @@ Status validate_depthwise_convolution_layer(DepthwiseConvolutionLayerNode &node)
switch(dwc_algorithm)
{
case DepthwiseConvolutionMethod::Default:
- case DepthwiseConvolutionMethod::GEMV:
- status = DepthwiseConvolutionLayer::validate(input, weights, biases, output, conv_info, depth_multiplier);
- break;
case DepthwiseConvolutionMethod::Optimized3x3:
- status = DepthwiseConvolutionLayer3x3::validate(input, weights, biases, output, conv_info, depth_multiplier);
+ status = DepthwiseConvolutionLayer::validate(input, weights, biases, output, conv_info, depth_multiplier);
break;
default:
ARM_COMPUTE_RETURN_ERROR_MSG("Unsupported depthwise convolution method");
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 98581a21fe..b8b11f08b2 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -40,151 +40,301 @@ namespace arm_compute
{
class ICLTensor;
-/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
- *
- * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
- * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
- * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
- * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
- *
+/** Function to execute a depthwise convolution
*/
-class CLDepthwiseConvolutionLayer3x3 : public IFunction
+class CLDepthwiseConvolutionLayer : public IFunction
{
public:
/** Default constructor */
- CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
+ CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move constructor */
- CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
+ CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
+ CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
- CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
- /** Initialize the function's source, destination, conv and border_size.
+ CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
+ /** Initialize the function's source, destination, weights and convolution information.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
- * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input, S32 when input is QASYMM8.
* @param[in] output Destination tensor. Data type supported: same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
- * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
// Inherited methods overriden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
- std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
- CLFillBorderKernel _border_handler;
- CLPermute _permute_input_to_nchw;
- CLPermute _permute_weights_to_nchw;
- CLPermute _permute_output_to_nhwc;
- CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- const ITensor *_original_weights;
- bool _needs_permute;
- bool _needs_weights_reshape;
- bool _is_prepared;
+ /** Static function to choose the best depthwise convolution function for @ref CLDepthwiseConvolutionLayer
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
+ *
+ * @return a Depthwise Convolution Function
+ */
+ static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U), GPUTarget gpu_target = GPUTarget::MIDGARD);
+
+ /** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
+ * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
+ * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
+ * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
+ *
+ */
+ class CLDepthwiseConvolutionLayerInternal3x3 : public IFunction
+ {
+ public:
+ /** Default constructor */
+ CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerInternal3x3(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
+ /** Default move constructor */
+ CLDepthwiseConvolutionLayerInternal3x3(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerInternal3x3 &operator=(const CLDepthwiseConvolutionLayerInternal3x3 &) = delete;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayerInternal3x3 &operator=(CLDepthwiseConvolutionLayerInternal3x3 &&) = default;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ */
+ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
+ * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
+
+ // Inherited methods overriden:
+ void run() override;
+ void prepare() override;
+
+ void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
+ {
+ _memory_group = MemoryGroup(std::move(memory_manager));
+ };
+
+ private:
+ MemoryGroup _memory_group;
+ std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
+ CLFillBorderKernel _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ const ITensor *_original_weights;
+ bool _needs_permute;
+ bool _needs_weights_reshape;
+ bool _is_prepared;
+ };
+
+ /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDepthwiseConvolutionLayerNativeKernel
+ * -# @ref CLPermute (x 3) if the data layout is NCHW
+ *
+ */
+ class CLDepthwiseConvolutionLayerGeneric : public IFunction
+ {
+ public:
+ /** Default constructor */
+ CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerGeneric(const CLDepthwiseConvolutionLayerGeneric &) = delete;
+ /** Default move constructor */
+ CLDepthwiseConvolutionLayerGeneric(CLDepthwiseConvolutionLayerGeneric &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerGeneric &operator=(const CLDepthwiseConvolutionLayerGeneric &) = delete;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayerGeneric &operator=(CLDepthwiseConvolutionLayerGeneric &&) = default;
+ /** Initialize the function's source, destination, weights and convolution information.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ */
+ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F32.
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ // Inherited methods overriden:
+ void run() override;
+ void prepare() override;
+
+ void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
+ {
+ _memory_group = MemoryGroup(std::move(memory_manager));
+ };
+
+ private:
+ MemoryGroup _memory_group;
+
+ CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLPermute _permute_output_to_nchw;
+
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ const ITensor *_original_weights;
+
+ bool _needs_permute;
+ bool _is_prepared;
+ };
+
+ std::shared_ptr<IMemoryManager> _memory_manager;
+
+ DepthwiseConvolutionFunction _depth_conv_func;
+ CLDepthwiseConvolutionLayerInternal3x3 _func_3x3;
+ CLDepthwiseConvolutionLayerGeneric _func_generic;
};
-/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
+/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
*
- * -# @ref CLDepthwiseConvolutionLayerNativeKernel
- * -# @ref CLPermute (x 3) if the data layout is NCHW
+ * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
+ * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
+ * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
+ * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
*
*/
-class CLDepthwiseConvolutionLayer : public IFunction
+class CLDepthwiseConvolutionLayer3x3 : public IFunction
{
public:
/** Default constructor */
- CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
+ CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
/** Default move constructor */
- CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
+ CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
+ CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
/** Default move assignment operator */
- CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
- /** Initialize the function's source, destination, weights and convolution information.
+ CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
+ /** Initialize the function's source, destination, conv and border_size.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * Data type supported: Same as @p input.
* @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
+ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/F32.
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
+ * @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input, S32 when input is QASYMM8.
* @param[in] output Destination tensor. Data type supported: same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+ * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
// Inherited methods overriden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
-
- std::unique_ptr<IFunction> _optimised_function;
- CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLPermute _permute_output_to_nchw;
-
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- const ITensor *_original_weights;
-
- bool _needs_permute;
- bool _is_prepared;
+ CLDepthwiseConvolutionLayer _func;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index ea3ef9bf38..8fe9644963 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -37,48 +37,43 @@ namespace arm_compute
// Forward declarations
class ITensor;
-/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels:
- *
- * -# @ref NEDepthwiseConvolutionLayer3x3
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
- *
+/** Function to execute a depthwise convolution.
*/
-class NEDepthwiseConvolutionLayer3x3 : public IFunction
+class NEDepthwiseConvolutionLayer : public IFunction
{
public:
/** Default constructor */
- NEDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayer3x3(const NEDepthwiseConvolutionLayer3x3 &) = delete;
+ NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
/** Default move constructor */
- NEDepthwiseConvolutionLayer3x3(NEDepthwiseConvolutionLayer3x3 &&) = default;
+ NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
/** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayer3x3 &operator=(const NEDepthwiseConvolutionLayer3x3 &) = delete;
+ NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
- NEDepthwiseConvolutionLayer3x3 &operator=(NEDepthwiseConvolutionLayer3x3 &&) = default;
- /** Initialize the function's source, destination, kernels and border_size.
+ NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
+ /** Initialize the function's source, destination, weights and convolution information.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
- ARM_COMPUTE_DEPRECATED_REL_REPLACE(19.08, NEDepthwiseConvolutionLayerOptimized)
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
* Data type supported: Same as @p input.
- * @param[in] output Destination tensor. Data type supported: same as @p input.
* @param[in] conv_info Padding and stride information to use for the convolution.
* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
@@ -94,61 +89,214 @@ public:
void prepare() override;
private:
- /** Configure the kernels/functions for the generic pipeline.
+ /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*
+ * @return a Depthwise Convolution Function
*/
- void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Configure the kernels/functions for the optimized pipeline.
+ static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
+ ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ /** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
+ *
+ * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
+ *
+ * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
+ * -# @ref NEDepthwiseConvolutionLayer3x3Kernel if 3x3 and no assembly kernel implementation is present
+ * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
+ * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
+ * -# @ref NEActivationLayer if fused activation is required
+ *
+ */
+ class NEDepthwiseConvolutionLayerOptimizedInternal : public IFunction
+ {
+ public:
+ /** Default constructor */
+ NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayerOptimizedInternal(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
+ /** Default move constructor */
+ NEDepthwiseConvolutionLayerOptimizedInternal(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
+ /** Default move assignment operator */
+ NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+ /** Initialize the function's source, destination, kernels and border_size.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ */
+ void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ // Inherited methods overriden:
+ void run() override;
+ void prepare() override;
+
+ private:
+ /** Configure the kernels/functions for the generic pipeline.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ */
+ void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
+ /** Configure the kernels/functions for the optimized pipeline.
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info Activation layer information in case of a fused activation.
+ */
+ void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
+ /** Run generic kernel */
+ void run_generic();
+ /** Run optimized function */
+ void run_optimized();
+
+ MemoryGroup _memory_group;
+ NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
+ NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
+ NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+ NEFillBorderKernel _border_handler;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ NEActivationLayer _activationlayer_function;
+ Tensor _accumulator;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ const ITensor *_original_weights;
+ bool _has_bias;
+ bool _is_quantized;
+ bool _is_optimized;
+ bool _is_nchw;
+ bool _permute;
+ bool _is_activationlayer_enabled;
+ bool _is_prepared;
+ };
+
+ /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
+ *
+ * -# @ref NEDepthwiseConvolutionLayerNativeKernel
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
*/
- void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info);
- /** Run generic kernel */
- void run_generic();
- /** Run optimized function */
- void run_optimized();
+ class NEDepthwiseConvolutionLayerGeneric : public IFunction
+ {
+ public:
+ /** Default constructor */
+ NEDepthwiseConvolutionLayerGeneric();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayerGeneric(const NEDepthwiseConvolutionLayerGeneric &) = delete;
+ /** Default move constructor */
+ NEDepthwiseConvolutionLayerGeneric(NEDepthwiseConvolutionLayerGeneric &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
+ /** Default move assignment operator */
+ NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
+ /** Initialize the function's source, destination, weights and convolution information.
+ *
+ * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[out] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ */
+ void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-private:
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEFillBorderKernel _border_handler;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_optimized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerGeneric
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+
+ // Inherited methods overriden:
+ void run() override;
+ void prepare() override;
+
+ private:
+ NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
+ NEFillBorderKernel _fill_border;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ NEActivationLayer _activationlayer_function;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ bool _is_prepared;
+ bool _is_nchw;
+ bool _is_activationlayer_enabled;
+ const ITensor *_original_weights;
+ };
+
+ DepthwiseConvolutionFunction _depth_conv_func;
+ NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
+ NEDepthwiseConvolutionLayerGeneric _func_generic;
};
/** Basic function to execute optimized depthwise convolution routines. This function calls the following NEON kernels:
@@ -187,10 +335,11 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
*/
+ ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, NEDepthwiseConvolutionLayer)
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer3x3
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerOptimized
*
* @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
* @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
@@ -212,131 +361,7 @@ public:
void prepare() override;
private:
- /** Configure the kernels/functions for the generic pipeline.
- *
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Configure the kernels/functions for the optimized pipeline.
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- */
- void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Run generic kernel */
- void run_generic();
- /** Run optimized function */
- void run_optimized();
-
-private:
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEFillBorderKernel _border_handler;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_optimized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
-};
-
-/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
- *
- * -# @ref NEDepthwiseConvolutionLayerNativeKernel
- *
- */
-class NEDepthwiseConvolutionLayer : public IFunction
-{
-public:
- /** Default constructor */
- NEDepthwiseConvolutionLayer();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayer(const NEDepthwiseConvolutionLayer &) = delete;
- /** Default move constructor */
- NEDepthwiseConvolutionLayer(NEDepthwiseConvolutionLayer &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
- /** Default move assignment operator */
- NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
- /** Initialize the function's source, destination, weights and convolution information.
- *
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayer
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] output Destination tensor. Data type supported: same as @p input.
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overriden:
- void run() override;
- void prepare() override;
-
-private:
- NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
- NEFillBorderKernel _fill_border;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_nchw;
- bool _is_activationlayer_enabled;
- const ITensor *_original_weights;
+ NEDepthwiseConvolutionLayer _func;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file