aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-09-04 15:25:24 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-09-08 13:58:36 +0000
commit7d0adc602b3a3ff66184632fd388b25384a9bc99 (patch)
treecc314f8902eef0d037fd9353c3e01b2feca526ff /arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
parente8f05da5fb919aa209e1bf0e5c70dd15fff84b7f (diff)
downloadComputeLibrary-7d0adc602b3a3ff66184632fd388b25384a9bc99.tar.gz
COMPMID-3151: Remove NEDepthwiseConvolutionLayer3x3Kernel
Prefer NEDepthwiseConvolutionLayerNativeKernel as it has a native format of NHWC avoiding extra transformation to the NCHW domain. Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: If5d8de11691b8ef7f4c3816941f87417d0c8646b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3930 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h36
1 files changed, 0 insertions, 36 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 116ac16ce7..3b75bb11a7 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
@@ -171,41 +170,7 @@ private:
void prepare() override;
private:
- /** Configure the kernels/functions for the generic pipeline.
- *
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure_generic(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Configure the kernels/functions for the optimized pipeline.
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info Activation layer information in case of a fused activation.
- */
- void configure_optimized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation = Size2D(1U, 1U));
- /** Run generic kernel */
- void run_generic();
- /** Run optimized function */
- void run_optimized();
-
MemoryGroup _memory_group;
- NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel;
NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
NEFillBorderKernel _border_handler;
@@ -220,7 +185,6 @@ private:
const ITensor *_original_weights;
bool _has_bias;
bool _is_quantized;
- bool _is_optimized;
bool _is_nchw;
bool _permute;
bool _is_activationlayer_enabled;