diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-06-24 14:56:34 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-07-09 09:31:37 +0000 |
commit | 30271c779c36a2abe6995c4454674d92bbc1f91f (patch) | |
tree | 531257ff87cf2cb8d6f3b8da0abe3e6cb77a2a0e /arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h | |
parent | 30dbeef2f46bdd6fe05d25dfa27cb4b2359dced3 (diff) | |
download | ComputeLibrary-30271c779c36a2abe6995c4454674d92bbc1f91f.tar.gz |
COMPMID-2156: Optimized dilated convolution for NEON.
Change-Id: I3a8abe8cc9637c8983d9bd69dcbaee1a15eac8d0
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1492
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h | 39 |
1 files changed, 22 insertions, 17 deletions
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h index 7d2cff7315..b88e750fa9 100644 --- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h +++ b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h @@ -30,9 +30,6 @@ #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" -#include "arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h" -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp" - namespace arm_compute { /** Depthwise convolution assembly kernel glue */ @@ -52,38 +49,44 @@ public: NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; /** Default move assignment operator */ NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default; + /** Default destructor */ + ~NEDepthwiseConvolutionAssemblyDispatch(); /** Initialize the function's source, destination, kernels and border_size. * * @note Supports only NHWC format * * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. * Data type supported: Same as @p input. * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1, 1)); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch * * @note Supports only NHWC format * * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. * Data type supported: Same as @p input. * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return An error status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), + const Size2D &dilation = Size2D(1, 1)); /** Check if the optimized kernel can be used for the given kernel sizes and strides * * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC @@ -103,16 +106,18 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - const ITensor *_input; - const ITensor *_weights; - const ITensor *_bias; - ITensor *_output; - Tensor _packed_weights; - Tensor _workspace; - bool _is_prepared; - std::unique_ptr<depthwise::IDepthwiseConvolution> _dwc_assembly_kernel; - NEDepthwiseConvolutionAssemblyKernelWrapper _dwc_acl_kernel; + struct LocalImpl; + +private: + MemoryGroup _memory_group; + const ITensor *_input; + const ITensor *_weights; + const ITensor *_bias; + ITensor *_output; + Tensor _packed_weights; + Tensor _workspace; + bool _is_prepared; + std::unique_ptr<LocalImpl> _pImpl; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H__ */ |