aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-04-08 12:02:58 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2021-04-19 13:45:08 +0000
commit60c3b0e6821a80d78ffca5be30e05d062d071cd2 (patch)
tree3e263a45aa9617cfd7704b2b33ea4337f1582321
parent4f1650f0c9919f0bac5024b8e31c0f754d25aec3 (diff)
downloadComputeLibrary-60c3b0e6821a80d78ffca5be30e05d062d071cd2.tar.gz
Port DepthwiseConvolution to new API
Resolves: COMPMID-4185 Change-Id: Ib5f22356356a022d567bb18d44ea272b62d10ebf Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5424 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp5
-rw-r--r--arm_compute/core/Types.h13
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h64
-rw-r--r--arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h123
-rw-r--r--docs/00_introduction.dox6
-rw-r--r--docs/06_functions_list.dox1
-rwxr-xr-xscripts/clang_tidy_rules.py9
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp8
-rw-r--r--src/core/NEON/NEKernels.h1
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h131
-rw-r--r--src/core/cpu/kernels/CpuActivationKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp (renamed from src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp)139
-rw-r--r--src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h117
-rw-r--r--src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp14
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp409
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp521
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConvolution.h230
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp (renamed from src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp)265
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h97
-rw-r--r--tests/NEON/Helper.h15
-rw-r--r--tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp17
-rw-r--r--tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h33
25 files changed, 1452 insertions, 797 deletions
diff --git a/Android.bp b/Android.bp
index 3736ef754f..d968f029a2 100644
--- a/Android.bp
+++ b/Android.bp
@@ -176,7 +176,6 @@ cc_library_static {
"src/core/NEON/kernels/NECropKernel.cpp",
"src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp",
"src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp",
- "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp",
"src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
"src/core/NEON/kernels/NEFFTRadixStageKernel.cpp",
"src/core/NEON/kernels/NEFFTScaleKernel.cpp",
@@ -299,6 +298,7 @@ cc_library_static {
"src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp",
"src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp",
"src/core/cpu/kernels/CpuCopyKernel.cpp",
+ "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp",
"src/core/cpu/kernels/CpuDequantizationKernel.cpp",
"src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp",
"src/core/cpu/kernels/CpuDirectConvolutionStageKernel.cpp",
@@ -612,7 +612,6 @@ cc_library_static {
"src/runtime/NEON/functions/NETranspose.cpp",
"src/runtime/NEON/functions/NEUnstack.cpp",
"src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp",
- "src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp",
"src/runtime/OMP/OMPScheduler.cpp",
"src/runtime/OffsetLifetimeManager.cpp",
"src/runtime/OffsetMemoryPool.cpp",
@@ -630,6 +629,8 @@ cc_library_static {
"src/runtime/cpu/operators/CpuAdd.cpp",
"src/runtime/cpu/operators/CpuConcatenate.cpp",
"src/runtime/cpu/operators/CpuCopy.cpp",
+ "src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp",
+ "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp",
"src/runtime/cpu/operators/CpuDequantization.cpp",
"src/runtime/cpu/operators/CpuDirectConvolution.cpp",
"src/runtime/cpu/operators/CpuElementwise.cpp",
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 53333ff608..b1f340d18e 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1861,6 +1861,19 @@ private:
bool _broadcast_bias;
};
+struct ConvolutionInfo
+{
+ ConvolutionInfo() = default;
+ ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
+ : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation)
+ {
+ }
+ PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */
+ unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */
+ ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */
+ Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */
+};
+
struct DepthwiseConvolutionReshapeInfo
{
unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 56038dd853..ba37f9a61e 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -435,16 +435,13 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input)
/** Calculate the depthwise convolution output shape of a tensor
*
- * @param[in] input Input tensor info
- * @param[in] weights Weights tensor info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth.
- * @param[in] dilation Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] input Input tensor info
+ * @param[in] weights Weights tensor info
+ * @param[in] info Convolution info
*
* @return the calculated shape
*/
-inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U,
- 1U))
+inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info)
{
const TensorShape input_shape{ input.tensor_shape() };
const TensorShape weights_shape{ weights.tensor_shape() };
@@ -462,12 +459,12 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
unsigned int output_height = 0;
std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx],
weights_shape[weights_width_idx], weights_shape[weights_height_idx],
- conv_info, dilation);
+ info.pad_stride_info, info.dilation);
TensorShape output_shape{ input_shape };
output_shape.set(width_idx, output_width);
output_shape.set(height_idx, output_height);
- output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier);
+ output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier);
return output_shape;
}
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 9aa8f04eb8..c74b2a93ee 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -24,9 +24,10 @@
#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
-#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
#include <memory>
namespace arm_compute
@@ -91,25 +92,6 @@ public:
void prepare() override;
private:
- /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a Depthwise Convolution Function
- */
- static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
-
/** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
*
* @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
@@ -173,23 +155,9 @@ private:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _accumulator;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- const ITensor *_original_weights;
- bool _has_bias;
- bool _is_quantized;
- bool _is_nchw;
- bool _permute;
- bool _is_activationlayer_enabled;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
@@ -248,26 +216,14 @@ private:
// Inherited methods overriden:
void run() override;
- void prepare() override;
private:
- std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_nchw;
- bool _is_activationlayer_enabled;
- const ITensor *_original_weights;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
-
- DepthwiseConvolutionFunction _depth_conv_func;
- NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized;
- NEDepthwiseConvolutionLayerGeneric _func_generic;
+ MemoryGroup _memory_group;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
deleted file mode 100644
index 7f63717b02..0000000000
--- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-/** Depthwise convolution assembly kernel glue */
-class NEDepthwiseConvolutionAssemblyDispatch : public IFunction
-{
-public:
- /** Default constructor
- *
- * @param[in,out] memory_manager Memory manager to use
- */
- NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move constructor */
- NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move assignment operator */
- NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Default destructor */
- ~NEDepthwiseConvolutionAssemblyDispatch();
- /** Initialize the function's source, destination, kernels and border_size.
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch
- *
- * @note Supports only NHWC format
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor. Data type supported: same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return An error status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(),
- const Size2D &dilation = Size2D(1, 1));
- /** Check if the optimized kernel can be used for the given kernel sizes and strides
- *
- * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC
- *
- * @param[in] input Input tensor info.
- * @param[in] weights Weights tensor info.
- * @param[in] conv_info Convolution layer metadata.
- * @param[in] depth_multiplier (Optional) Depth multiplier to be used.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed.
- */
- static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1));
-
- // Inherited methods overridden:
- void run() override;
- void prepare() override;
-
-private:
- struct LocalImpl;
-
-private:
- MemoryGroup _memory_group;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_bias;
- ITensor *_output;
- Tensor _packed_weights;
- Tensor _workspace;
- bool _is_prepared;
- std::unique_ptr<LocalImpl> _pImpl;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index efc2963f6e..cfb27ff8b2 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -266,7 +266,7 @@ v20.11 Public major release
- @ref NEGEMMTranspose1xWKernel
- NEPoolingLayerKernel
- NEConvolutionKernel
- - @ref NEDepthwiseConvolutionLayerNativeKernel
+ - NEDepthwiseConvolutionLayerNativeKernel
- @ref NEGEMMLowpMatrixMultiplyKernel
- @ref NEGEMMMatrixMultiplyKernel
- NEDirectConvolutionLayerOutputStageKernel
@@ -818,7 +818,7 @@ v19.08 Public major release
- @ref NESinLayer
- NEBatchConcatenateLayerKernel
- @ref NEDepthToSpaceLayerKernel / @ref NEDepthToSpaceLayer
- - @ref NEDepthwiseConvolutionLayerNativeKernel
+ - NEDepthwiseConvolutionLayerNativeKernel
- @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- @ref NEMeanStdDevNormalizationKernel / @ref NEMeanStdDevNormalizationLayer
- @ref NESpaceToDepthLayerKernel / @ref NESpaceToDepthLayer
@@ -863,7 +863,7 @@ v19.05 Public major release
- @ref NEBatchToSpaceLayerKernel / @ref NEBatchToSpaceLayer
- NEComplexPixelWiseMultiplicationKernel / @ref NEComplexPixelWiseMultiplication
- @ref NECropKernel / @ref NECropResize
- - @ref NEDepthwiseConvolutionAssemblyDispatch
+ - NEDepthwiseConvolutionAssemblyDispatch
- @ref NEFFTDigitReverseKernel
- @ref NEFFTRadixStageKernel
- @ref NEFFTScaleKernel
diff --git a/docs/06_functions_list.dox b/docs/06_functions_list.dox
index de43a0cee7..bd044203f2 100644
--- a/docs/06_functions_list.dox
+++ b/docs/06_functions_list.dox
@@ -88,7 +88,6 @@ namespace arm_compute
- @ref NEConvolutionLayerReshapeWeights
- @ref NECropResize
- @ref NEDeconvolutionLayer
- - @ref NEDepthwiseConvolutionAssemblyDispatch
- @ref NEDepthwiseConvolutionLayer
- @ref NEDequantizationLayer
- @ref NEDetectionPostProcessLayer
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index fd6fd9412e..c38e3ddde4 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -111,14 +111,15 @@ def filter_clang_tidy_lines( lines ):
("NEWinogradLayerKernel.cpp" in line and "use '= default' to define a trivial destructor" in line) or
("NEGEMMLowpMatrixMultiplyCore.cpp" in line and "constructor does not initialize these fields" in line) or
("NEGEMMLowpAssemblyMatrixMultiplyCore" in line and "constructor does not initialize these fields" in line) or
- ("NEDepthwiseConvolutionLayerNativeKernel" in line and re.search(r"parameter '[^']+' is unused", line)) or
- ("NEDepthwiseConvolutionAssemblyDispatch" in line and re.search(r"parameter '[^']+' is unused", line)) or
+ ("CpuDepthwiseConvolutionNativeKernel" in line and re.search(r"parameter '[^']+' is unused", line)) or
+ ("CpuDepthwiseConvolutionAssemblyDispatch" in line and re.search(r"parameter '[^']+' is unused", line)) or
+ ("CpuDepthwiseConvolutionAssemblyDispatch" in line and "modernize-use-equals-default" in line) or
("CPUUtils.cpp" in line and "consider replacing 'unsigned long' with 'uint64'" in line) or
("CPUUtils.cpp" in line and "parameter 'cpusv' is unused" in line) or
("CPUUtils.cpp" in line and "warning: uninitialized record type" in line) or
("Utils.h" in line and "warning: Use of zero-allocated memory" in line) or
- ("NEDepthwiseConvolutionLayerNativeKernel.cpp" in line and "misc-non-private-member-variables-in-classes" in line) or # This is to prevent false positive, should be reassessed with the newer clang-tidy
- ("NEDepthwiseConvolutionLayerNativeKernel.cpp" in line and "cppcoreguidelines-pro-type-member-init" in line)): # This is to prevent false positive, should be reassessed with the newer clang-tidy
+ ("CpuDepthwiseConvolutionNativeKernel.cpp" in line and "misc-non-private-member-variables-in-classes" in line) or # This is to prevent false positive, should be reassessed with the newer clang-tidy
+ ("CpuDepthwiseConvolutionNativeKernel.cpp" in line and "cppcoreguidelines-pro-type-member-init" in line)): # This is to prevent false positive, should be reassessed with the newer clang-tidy
print_context=False
continue
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index e8daa56f36..287a965f5b 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -105,7 +105,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
if(output->total_size() != 0)
{
- const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
}
@@ -116,7 +117,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
unsigned int depth_multiplier, GPUTarget gpu_target, std::string &kernel_name, const Size2D dilation)
{
// Output auto inizialitation if not yet initialized
- const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info
+ {
+ conv_info, depth_multiplier, ActivationLayerInfo(), dilation
+ };
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, info);
auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
const unsigned int conv_stride_x = conv_info.stride().first;
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 6400ba53e2..f7603e6397 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -65,8 +65,9 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
const size_t weights_width = 3;
const size_t weights_height = 3;
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(
- *input, TensorInfo(TensorShape(weights_width, weights_height), 1, weights->data_type()).set_data_layout(DataLayout::NCHW), conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(
+ *input, TensorInfo(TensorShape(weights_width, weights_height), 1, weights->data_type()).set_data_layout(DataLayout::NCHW), info);
if(is_qasymm)
{
DepthwiseConvolutionReshapeInfo info;
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index c34018a000..fcfa7f878d 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_UNUSED(idx_c);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_c) != (input->dimension(idx_c) * depth_multiplier));
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
const bool is_quantized = is_data_type_quantized(input->data_type());
@@ -156,7 +157,8 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
auto padding_info = get_padding_info({ input, output });
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*(input->info()), *(weights->info()), conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*(input->info()), *(weights->info()), info);
auto_init_if_empty(*(output->info()), input->info()->clone()->set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info()));
_input = input;
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 264f521be2..e982470402 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -38,7 +38,6 @@
#include "src/core/NEON/kernels/NECropKernel.h"
#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
-#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
deleted file mode 100644
index 713cdcd9d9..0000000000
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/INEKernel.h"
-#include "support/Requires.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_neon.h>
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionLayerNativeKernel";
- }
- /** Default constructor */
- NEDepthwiseConvolutionLayerNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Default destructor */
- ~NEDepthwiseConvolutionLayerNativeKernel() = default;
- /** Initialize the function's source, destination and parameters.
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
-
- template <typename T, typename TW, FloatEnalber<T> = 0>
- void run_depthwise(const Window &window, bool has_biases);
-
- template <typename T>
- using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
-
- template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
- void run_depthwise(const Window &window, bool has_biases);
-
- /** Common signature for all the specialised depthwise convolution native functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
-
- DepthwiseFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_biases;
- ITensor *_output;
- PadStrideInfo _conv_info;
- unsigned int _depth_multiplier;
- Size2D _dilation;
- std::vector<int> _output_multiplier;
- std::vector<int> _output_shift;
- bool _has_biases;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
diff --git a/src/core/cpu/kernels/CpuActivationKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp
index 761258941d..eb38c18cff 100644
--- a/src/core/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/core/cpu/kernels/CpuActivationKernel.cpp
@@ -205,7 +205,7 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *src,
void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src);
_act_info = activation_info;
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp
index 24fd01fee1..a5d1b61c08 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp
@@ -21,8 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/CPP/Validate.h"
@@ -35,6 +37,10 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
constexpr auto data_layout = DataLayout::NHWC;
@@ -716,19 +722,18 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso
input_it, weights_it, biases_it, output_it);
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
- const Size2D &dilation)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (dilation.x() - 1) > input->dimension(1) + conv_info.pad_left() + conv_info.pad_right());
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (dilation.y() - 1) > input->dimension(2) + conv_info.pad_top() + conv_info.pad_bottom());
- ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(0) * depth_multiplier) != weights->dimension(0));
- ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
- ARM_COMPUTE_RETURN_ERROR_ON((conv_info.stride().first < 1) || (conv_info.stride().second < 1));
+ ARM_COMPUTE_RETURN_ERROR_ON(info.depth_multiplier == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (info.dilation.x() - 1) > input->dimension(1) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right());
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (info.dilation.y() - 1) > input->dimension(2) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom());
+ ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(0) * info.depth_multiplier) != weights->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON((info.dilation.x() < 1) || (info.dilation.y() < 1));
+ ARM_COMPUTE_RETURN_ERROR_ON((info.pad_stride_info.stride().first < 1) || (info.pad_stride_info.stride().second < 1));
if(is_data_type_quantized_per_channel(weights->data_type()))
{
@@ -757,7 +762,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
if(output->total_size() != 0)
{
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -766,35 +771,30 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
}
} // namespace
-NEDepthwiseConvolutionLayerNativeKernel::NEDepthwiseConvolutionLayerNativeKernel()
- : _func(), _input(), _weights(), _biases(), _output(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases()
+CpuDepthwiseConvolutionNativeKernel::CpuDepthwiseConvolutionNativeKernel()
+ : _func(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases()
{
}
-void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation)
+void CpuDepthwiseConvolutionNativeKernel::configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, dilation));
-
- _input = input;
- _weights = weights;
- _biases = biases;
- _output = output;
- _conv_info = conv_info;
- _depth_multiplier = depth_multiplier;
- _dilation = dilation;
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, weights, (biases != nullptr) ? biases : nullptr, output, info));
+
+ _conv_info = info.pad_stride_info;
+ _depth_multiplier = info.depth_multiplier;
+ _dilation = info.dilation;
_has_biases = (biases != nullptr);
- if(is_data_type_quantized(_input->info()->data_type()))
+ if(is_data_type_quantized(input->data_type()))
{
- const auto input_scale = input->info()->quantization_info().uniform().scale;
- const auto output_scale = output->info()->quantization_info().uniform().scale;
+ const auto input_scale = input->quantization_info().uniform().scale;
+ const auto output_scale = output->quantization_info().uniform().scale;
- auto weights_scale = weights->info()->quantization_info().scale();
- if(!is_data_type_quantized_per_channel(_weights->info()->data_type()))
+ auto weights_scale = weights->quantization_info().scale();
+ if(!is_data_type_quantized_per_channel(weights->data_type()))
{
- for(size_t i = 1; i < _weights->info()->dimension(channel_idx); ++i)
+ for(size_t i = 1; i < weights->dimension(channel_idx); ++i)
{
weights_scale.push_back(weights_scale.front());
}
@@ -812,100 +812,107 @@ void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, co
}
}
- switch(_weights->info()->data_type())
+ switch(weights->data_type())
{
case DataType::QASYMM8:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, uint8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, uint8_t>;
break;
case DataType::QASYMM8_SIGNED:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>;
break;
case DataType::QSYMM8_PER_CHANNEL:
- if(_input->info()->data_type() == DataType::QASYMM8)
+ if(input->data_type() == DataType::QASYMM8)
{
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, int8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, int8_t>;
}
else
{
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>;
}
break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<float16_t, float16_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float16_t, float16_t>;
break;
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F32:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<float, float>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float, float>;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
}
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);
- auto_init_if_empty(*output->info(), input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info()));
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
- Window win = calculate_max_window(*output->info(), Steps());
- INEKernel::configure(win);
+ Window win = calculate_max_window(*output, Steps());
+ ICpuKernel::configure(win);
}
-Status NEDepthwiseConvolutionLayerNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
- const Size2D &dilation)
+Status CpuDepthwiseConvolutionNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, dilation));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, info));
return Status{};
}
-void NEDepthwiseConvolutionLayerNativeKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- (this->*_func)(window, _has_biases);
-}
-
-template <typename T, typename TW, NEDepthwiseConvolutionLayerNativeKernel::FloatEnalber<T>>
-void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window, bool has_biases)
+template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::FloatEnalber<T>>
+void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases,
+ ITensor *dst, const Window &window, bool has_biases)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
if(_depth_multiplier == 1)
{
- depthwise_loop_multiplier1_fp<T>(_input, _weights, _biases, _output, _conv_info, _dilation, window, has_biases);
+ depthwise_loop_multiplier1_fp<T>(src, weights, biases, dst, _conv_info, _dilation, window, has_biases);
}
else
{
- depthwise_loop_generic_fp<T>(_input, _weights, _biases, _output, _conv_info, _dilation, _depth_multiplier, window, has_biases);
+ depthwise_loop_generic_fp<T>(src, weights, biases, dst, _conv_info, _dilation, _depth_multiplier, window, has_biases);
}
}
-template <typename T, typename TW, NEDepthwiseConvolutionLayerNativeKernel::Quantized8bitEnalber<T>>
-void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window, bool has_biases)
+template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::Quantized8bitEnalber<T>>
+void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases,
+ ITensor *dst, const Window &window, bool has_biases)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
if(_depth_multiplier == 1)
{
- depthwise_loop_multiplier1_quantized<T, TW>(_input, _weights, _biases, _output, _conv_info, _dilation, _output_multiplier, _output_shift, window, has_biases);
+ depthwise_loop_multiplier1_quantized<T, TW>(src, weights, biases, dst, _conv_info, _dilation, _output_multiplier, _output_shift, window, has_biases);
}
else
{
const bool is_pow2 = ((_depth_multiplier & (_depth_multiplier - 1)) == 0);
- const bool is_quantized_per_tensor = !(is_data_type_quantized_per_channel(_weights->info()->data_type()));
+ const bool is_quantized_per_tensor = !(is_data_type_quantized_per_channel(weights->info()->data_type()));
if(is_pow2 && is_quantized_per_tensor && _depth_multiplier >= 8)
{
- depthwise_loop_pow2_quantized_per_tensor<T, TW>(_input, _weights, _biases, _output, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
+ depthwise_loop_pow2_quantized_per_tensor<T, TW>(src, weights, biases, dst, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
}
else
{
- depthwise_loop_generic_quantized<T, TW>(_input, _weights, _biases, _output, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
+ depthwise_loop_generic_quantized<T, TW>(src, weights, biases, dst, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
}
}
}
+
+void CpuDepthwiseConvolutionNativeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+ const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ const auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
+ (this->*_func)(src, weights, biases, dst, window, _has_biases);
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h
new file mode 100644
index 0000000000..242536d441
--- /dev/null
+++ b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H
+#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H
+
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+#include "support/Requires.h"
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_neon.h>
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the kernel to run a depthwise convolution native on a tensor. */
+class CpuDepthwiseConvolutionNativeKernel : public ICpuKernel
+{
+public:
+ const char *name() const override
+ {
+ return "CpuDepthwiseConvolutionNativeKernel";
+ }
+ /** Default constructor */
+ CpuDepthwiseConvolutionNativeKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConvolutionNativeKernel);
+
+ /** Initialize the function's source, destination and parameters.
+ *
+ * @note Supported data layouts: NHWC
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ */
+ void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionNativeKernel
+ *
+ * @note Supported data layouts: NHWC
+ *
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T>
+ using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
+
+ template <typename T, typename TW, FloatEnalber<T> = 0>
+ void run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
+
+ template <typename T>
+ using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
+
+ template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
+ void run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
+
+ /** Common signature for all the specialised depthwise convolution native functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using DepthwiseFunctionPtr = void (CpuDepthwiseConvolutionNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
+
+ DepthwiseFunctionPtr _func;
+ PadStrideInfo _conv_info;
+ unsigned int _depth_multiplier;
+ Size2D _dilation;
+ std::vector<int> _output_multiplier;
+ std::vector<int> _output_shift;
+ bool _has_biases;
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H */
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index f7517a50a3..8e3d010786 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -82,9 +82,10 @@ Status validate_arguments_3x3(const ITensorInfo *input, const ITensorInfo *weigh
if(needs_permute)
{
- TensorShape permuted_input_shape = input->tensor_shape();
- TensorShape permuted_weights_shape = weights->tensor_shape();
- TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ TensorShape permuted_input_shape = input->tensor_shape();
+ TensorShape permuted_weights_shape = weights->tensor_shape();
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
permute(permuted_input_shape, PermutationVector(1U, 2U, 0U));
permute(permuted_weights_shape, PermutationVector(1U, 2U, 0U));
@@ -272,9 +273,10 @@ Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::validate
if(needs_permute)
{
- TensorShape permuted_input_shape = input->tensor_shape();
- TensorShape permuted_weights_shape = weights->tensor_shape();
- TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ TensorShape permuted_input_shape = input->tensor_shape();
+ TensorShape permuted_weights_shape = weights->tensor_shape();
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ TensorShape permuted_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index d17f6b5cd9..e1ceb0f083 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,54 +27,39 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h"
using namespace arm_compute::misc;
using namespace arm_compute::misc::shape_calculator;
namespace arm_compute
{
-namespace
-{
-Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
+NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
+
+struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::Impl
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
- if(!is_data_type_quantized_per_channel(weights->data_type()))
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
- }
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON(dilation.x() < 1 || dilation.y() < 1);
- const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (dilation.x() - 1) > input->dimension(idx_w) + conv_info.pad_left() + conv_info.pad_right());
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (dilation.y() - 1) > input->dimension(idx_h) + conv_info.pad_top() + conv_info.pad_bottom());
-
- if(biases != nullptr)
+ ITensor *src{ nullptr }; // SRC_0
+ ITensor *dst{ nullptr }; // DST_0
+ const ITensor *weights
{
- const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
- ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
- }
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation));
-
- //Validate Activation Layer
- if(act_info.enabled())
+ nullptr
+ }; // SRC_1
+ const ITensor *biases
{
- ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
- }
- return Status{};
-}
-} // namespace
-
-NEDepthwiseConvolutionLayer::~NEDepthwiseConvolutionLayer() = default;
+ nullptr
+ }; // SRC_2
+ Tensor permuted_input{}; // INT_0
+ Tensor permuted_weights{}; // INT_1
+ Tensor permuted_output{}; // INT_2
+ Tensor workspace{}; // INT_3
+ Tensor packed_weights{}; // INT_4
+ std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr };
+ bool is_prepared{ false };
+ bool permute{ false };
+};
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _dwc_optimized_func(memory_manager), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _accumulator(), _permuted_input(),
- _permuted_weights(), _permuted_output(), _original_weights(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
+ : _memory_group(memory_manager), _impl(std::make_unique<Impl>())
{
}
@@ -87,65 +72,76 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::
const Size2D &dilation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
- output->info(), conv_info, depth_multiplier, act_info, dilation));
-
- _original_weights = weights;
- _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
- _has_bias = biases != nullptr;
- _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
- _permute = _is_nchw;
- _is_prepared = false;
- _is_activationlayer_enabled = act_info.enabled();
+
+ bool is_nhwc = input->info()->data_layout() == DataLayout::NCHW;
+ _impl->src = input;
+ _impl->weights = weights;
+ _impl->biases = biases;
+ _impl->dst = output;
+ _impl->permute = is_nhwc;
+
+ _impl->op = std::make_unique<cpu::CpuDepthwiseConvolution>();
+ ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
+ _impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases == nullptr ? nullptr : _impl->biases->info(),
+ _impl->dst->info(), info);
// Configure pipeline
- ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
- const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
- const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
- _is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
- if(!_is_activationlayer_enabled)
+ ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
+ const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
+ const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
+ bool is_activationlayer_enabled = act_info.enabled() && !(is_relu || is_relu6);
+
+ if(!is_activationlayer_enabled)
{
act_info_to_use = act_info;
}
+ info = ConvolutionInfo{ conv_info, depth_multiplier, act_info_to_use, dilation };
- if(_is_nchw)
+ auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConvolutionAssemblyDispatch>();
+
+ if(is_nhwc)
{
- _memory_group.manage(&_permuted_input);
- _memory_group.manage(&_permuted_output);
+ auto permute_input = std::make_unique<cpu::CpuPermute>();
+ auto permute_weights = std::make_unique<cpu::CpuPermute>();
+ auto permute_output = std::make_unique<cpu::CpuPermute>();
+
+ _memory_group.manage(&_impl->permuted_input);
+ _memory_group.manage(&_impl->permuted_weights);
+ _memory_group.manage(&_impl->permuted_output);
// Configure the function to transform the input tensor from NCHW -> NHWC
- _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
- _permuted_input.info()->set_data_layout(DataLayout::NHWC);
+ permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
+ _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
// Configure the function to transform the weights tensor from IHW -> HWI
- _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
- _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
+ permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
+ _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
- _permuted_output.info()->set_data_layout(DataLayout::NHWC);
- _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
+ _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
+ _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
// Configure optimized depthwise
- _dwc_optimized_func.configure(&_permuted_input, &_permuted_weights, biases, &_permuted_output, conv_info, depth_multiplier, act_info_to_use, dilation);
+ dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases->info(), _impl->permuted_output.info(), info);
// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
- _permuted_output.info()->set_data_layout(DataLayout::NHWC);
- _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
+ _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
+ permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
- // Allocate tensors
- _permuted_input.allocator()->allocate();
- _permuted_output.allocator()->allocate();
+ _impl->permuted_input.allocator()->allocate();
+ _impl->permuted_output.allocator()->allocate();
}
else
{
- _dwc_optimized_func.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info_to_use, dilation);
+ dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases->info(), _impl->dst->info(), info);
}
- // Configure activation
- if(_is_activationlayer_enabled)
- {
- _activationlayer_function.configure(output, nullptr, act_info);
- }
+ // Allocate memory based on the internal memory requirements
+ experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace();
+ _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size }, 1, DataType::S8), mem_req[0].alignment);
+ _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size }, 1, DataType::S8), mem_req[1].alignment);
+
+ _impl->workspace.allocator()->allocate();
+ _impl->packed_weights.allocator()->allocate();
}
Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::validate(const ITensorInfo *input,
@@ -157,63 +153,66 @@ Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal
const ActivationLayerInfo &act_info,
const Size2D &dilation)
{
- return validate_arguments_optimized(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+ ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
+ return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info);
}
void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
{
prepare();
-
MemoryGroupResourceScope scope_mg(_memory_group);
- // Permute input
- if(_permute)
- {
- _permute_input.run();
- }
-
- // Run assembly function
- _dwc_optimized_func.run();
-
- // Permute output
- if(_is_nchw)
- {
- _permute_output.run();
- }
-
- // Run activation
- if(_is_activationlayer_enabled)
- {
- _activationlayer_function.run();
- }
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
+ pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
+ pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
+ pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
+ pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
+ pack.add_tensor(TensorType::ACL_INT_3, &_impl->workspace);
+ pack.add_tensor(TensorType::ACL_INT_4, &_impl->packed_weights);
+ pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
+
+ _impl->op->run(pack);
}
void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::prepare()
{
- if(!_is_prepared)
+ if(!_impl->is_prepared)
{
// Permute weights
- if(_permute)
+ if(_impl->permute)
{
- _permuted_weights.allocator()->allocate();
- _permute_weights.run();
- _original_weights->mark_as_unused();
+ _impl->permuted_weights.allocator()->allocate();
+ _impl->weights->mark_as_unused();
}
- // Prepare optimized function
- _dwc_optimized_func.prepare();
- if(!_permuted_weights.is_used())
+ if(!_impl->permuted_weights.is_used())
{
- _permuted_weights.allocator()->free();
+ _impl->permuted_weights.allocator()->free();
}
- _is_prepared = true;
+ _impl->is_prepared = true;
}
}
+struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
+{
+ Tensor permuted_input{};
+ Tensor permuted_weights{};
+ Tensor permuted_output{};
+ bool is_prepared{ false };
+ bool is_nchw{ false };
+ bool is_activationlayer_enabled{ false };
+ const ITensor *weights{ nullptr };
+ const ITensor *biases{ nullptr };
+ const ITensor *src{ nullptr };
+ ITensor *dst{ nullptr };
+ std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr };
+};
+
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
- : _depthwise_conv_kernel(), _permute_input(), _permute_weights(), _permute_output(), _activationlayer_function(), _permuted_input(), _permuted_weights(), _permuted_output(), _is_prepared(false),
- _is_nchw(false), _is_activationlayer_enabled(false), _original_weights(nullptr)
+ : _impl(std::make_unique<Impl>())
{
}
@@ -224,45 +223,49 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(
ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionLayer::validate(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(),
output->info(), conv_info, depth_multiplier, act_info, dilation));
- _is_nchw = input->info()->data_layout() == DataLayout::NCHW;
- _is_prepared = !_is_nchw;
+ const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
+ _impl->op = std::make_unique<cpu::CpuDepthwiseConvolution>();
+ _impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), info);
+
+ _impl->src = input;
+ _impl->dst = output;
+ _impl->weights = weights;
+ _impl->biases = biases;
+ _impl->is_nchw = input->info()->data_layout() == DataLayout::NCHW;
+ _impl->is_prepared = !_impl->is_nchw;
ITensor *input_to_use = input;
const ITensor *weights_to_use = weights;
ITensor *output_to_use = output;
- if(_is_nchw)
+ if(_impl->is_nchw)
{
- _permute_input.configure(input, &_permuted_input, PermutationVector(2U, 0U, 1U));
- _permuted_input.info()->set_data_layout(DataLayout::NHWC);
- input_to_use = &_permuted_input;
+ auto permute_input = std::make_unique<cpu::CpuPermute>();
+ auto permute_weights = std::make_unique<cpu::CpuPermute>();
- _permute_weights.configure(weights, &_permuted_weights, PermutationVector(2U, 0U, 1U));
- _permuted_weights.info()->set_data_layout(DataLayout::NHWC);
- weights_to_use = &_permuted_weights;
+ permute_input->configure(input->info(), _impl->permuted_input.info(), PermutationVector(2U, 0U, 1U));
+ _impl->permuted_input.info()->set_data_layout(DataLayout::NHWC);
+ input_to_use = &_impl->permuted_input;
- _permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
- output_to_use = &_permuted_output;
+ permute_weights->configure(weights->info(), _impl->permuted_weights.info(), PermutationVector(2U, 0U, 1U));
+ _impl->permuted_weights.info()->set_data_layout(DataLayout::NHWC);
+ weights_to_use = &_impl->permuted_weights;
+
+ _impl->permuted_output.allocator()->init(output->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
+ output_to_use = &_impl->permuted_output;
}
- _original_weights = weights_to_use;
- _depthwise_conv_kernel = std::make_unique<NEDepthwiseConvolutionLayerNativeKernel>();
- _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, conv_info, depth_multiplier, dilation);
+ auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>();
+ depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases->info(), output_to_use->info(), info);
- if(_is_nchw)
+ if(_impl->is_nchw)
{
- _permute_output.configure(&_permuted_output, output, PermutationVector(1U, 2U, 0U));
- _permuted_output.info()->set_data_layout(DataLayout::NHWC);
+ auto permute_output = std::make_unique<cpu::CpuPermute>();
+ permute_output->configure(_impl->permuted_output.info(), output->info(), PermutationVector(1U, 2U, 0U));
+ _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
- _permuted_input.allocator()->allocate();
- _permuted_weights.allocator()->allocate();
- _permuted_output.allocator()->allocate();
- }
-
- //Configure Activation Layer
- _is_activationlayer_enabled = act_info.enabled();
- if(_is_activationlayer_enabled)
- {
- _activationlayer_function.configure(output, nullptr, act_info);
+ _impl->permuted_input.allocator()->allocate();
+ _impl->permuted_weights.allocator()->allocate();
+ _impl->permuted_output.allocator()->allocate();
}
}
@@ -270,89 +273,53 @@ Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate
const PadStrideInfo &conv_info,
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- if(input->data_layout() == DataLayout::NCHW)
- {
- TensorShape permuted_input_shape = input->tensor_shape();
- TensorShape permuted_weights_shape = weights->tensor_shape();
- TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
- permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
- permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
- permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
-
- const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
- const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
- const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
- ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
-
- ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, conv_info, depth_multiplier, dilation));
- }
- else
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEDepthwiseConvolutionLayerNativeKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, dilation));
- }
-
- // Validate Activation Layer
- if(act_info.enabled())
- {
- ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output, nullptr, act_info));
- }
-
- return Status{};
+ ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
+ return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info);
}
void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
{
- if(_is_nchw)
- {
- prepare();
- _permute_input.run();
- }
-
- NEScheduler::get().schedule(_depthwise_conv_kernel.get(), Window::DimY);
-
- if(_is_nchw)
- {
- _permute_output.run();
- }
-
- if(_is_activationlayer_enabled)
- {
- _activationlayer_function.run();
- }
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->weights);
+ pack.add_tensor(TensorType::ACL_SRC_2, _impl->biases);
+ pack.add_tensor(TensorType::ACL_INT_0, &_impl->permuted_input);
+ pack.add_tensor(TensorType::ACL_INT_1, &_impl->permuted_weights);
+ pack.add_tensor(TensorType::ACL_INT_2, &_impl->permuted_output);
+ pack.add_tensor(TensorType::ACL_DST_0, _impl->dst);
+
+ _impl->op->run(pack);
}
-void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::prepare()
+NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _impl(std::make_unique<Impl>())
{
- if(!_is_prepared)
- {
- ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
-
- _permute_weights.run();
- _original_weights->mark_as_unused();
- _is_prepared = true;
- }
}
-NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(std::move(memory_manager)), _func_generic()
+#ifndef DOXYGEN_SKIP_THIS
+struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
{
-}
+ DepthwiseConvolutionFunction depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED };
+ NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr };
+ NEDepthwiseConvolutionLayerGeneric func_generic{};
+ std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr };
+};
+#endif // DOXYGEN_SKIP_THIS
void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
const ActivationLayerInfo &act_info, const Size2D &dilation)
{
- _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation);
- switch(_depth_conv_func)
+ const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
+ _impl->op = std::make_shared<cpu::CpuDepthwiseConvolution>();
+ _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
+ info);
+ switch(_impl->depth_conv_func)
{
case DepthwiseConvolutionFunction::OPTIMIZED:
- _func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+ _impl->func_optimized.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
break;
case DepthwiseConvolutionFunction::GENERIC:
- _func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
+ _impl->func_generic.configure(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
break;
default:
ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
@@ -362,43 +329,19 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
- DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
- switch(depth_conv_func)
- {
- case DepthwiseConvolutionFunction::OPTIMIZED:
- return NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
- break;
- case DepthwiseConvolutionFunction::GENERIC:
- return NEDepthwiseConvolutionLayerGeneric::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
- }
-}
-
-DepthwiseConvolutionFunction NEDepthwiseConvolutionLayer::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const PadStrideInfo &conv_info,
- unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation)
-{
- if(bool(NEDepthwiseConvolutionLayerOptimizedInternal::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation)))
- {
- return DepthwiseConvolutionFunction::OPTIMIZED;
- }
- else
- {
- return DepthwiseConvolutionFunction::GENERIC;
- }
+ ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
+ return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info);
}
void NEDepthwiseConvolutionLayer::run()
{
- switch(_depth_conv_func)
+ switch(_impl->depth_conv_func)
{
case DepthwiseConvolutionFunction::OPTIMIZED:
- _func_optimized.run();
+ _impl->func_optimized.run();
break;
case DepthwiseConvolutionFunction::GENERIC:
- _func_generic.run();
+ _impl->func_generic.run();
break;
default:
ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
@@ -407,13 +350,13 @@ void NEDepthwiseConvolutionLayer::run()
void NEDepthwiseConvolutionLayer::prepare()
{
- switch(_depth_conv_func)
+ switch(_impl->depth_conv_func)
{
case DepthwiseConvolutionFunction::OPTIMIZED:
- _func_optimized.prepare();
+ _impl->func_optimized.prepare();
break;
case DepthwiseConvolutionFunction::GENERIC:
- _func_generic.prepare();
+ _impl->func_generic.prepare();
break;
default:
ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp b/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp
new file mode 100644
index 0000000000..183a2af0cd
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp
@@ -0,0 +1,521 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h"
+
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/InfoHelpers.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace
+{
+Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ if(!is_data_type_quantized_per_channel(weights->data_type()))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
+ const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > input->dimension(idx_w) + info.pad_stride_info.pad_left() +
+ info.pad_stride_info.pad_right());
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > input->dimension(idx_h) + info.pad_stride_info.pad_top() +
+ info.pad_stride_info.pad_bottom());
+
+ if(biases != nullptr)
+ {
+ const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
+ }
+
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, info));
+
+ //Validate Activation Layer
+ if(info.act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info));
+ }
+ return Status{};
+}
+} // namespace
+
+CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::CpuDepthwiseConvolutionOptimizedInternal()
+ : _dwc_optimized_func(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _has_bias(false), _is_quantized(false),
+ _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
+{
+}
+
+void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configure(ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ ITensorInfo *output,
+ const ConvolutionInfo &info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ // Perform validation step
+ ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, (biases == nullptr) ? nullptr : biases,
+ output, info));
+
+ _is_quantized = is_data_type_quantized_asymmetric(input->data_type());
+ _has_bias = biases != nullptr;
+ _is_nchw = input->data_layout() == DataLayout::NCHW;
+ _permute = _is_nchw;
+ _is_prepared = false;
+
+ // Configure pipeline
+ ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
+ const bool is_relu = arm_compute::utils::info_helpers::is_relu(info.act_info);
+ const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(info.act_info);
+ _is_activationlayer_enabled = info.act_info.enabled() && !(is_relu || is_relu6);
+
+ if(!_is_activationlayer_enabled)
+ {
+ act_info_to_use = info.act_info;
+ }
+
+ _dwc_optimized_func = std::make_unique<CpuDepthwiseConvolutionAssemblyDispatch>();
+ if(_is_nchw)
+ {
+ _permute_input = std::make_unique<cpu::CpuPermute>();
+ _permute_weights = std::make_unique<cpu::CpuPermute>();
+ _permute_output = std::make_unique<cpu::CpuPermute>();
+
+ auto input_perm = std::make_unique<TensorInfo>();
+ auto weights_perm = std::make_unique<TensorInfo>();
+ auto output_perm = std::make_unique<TensorInfo>();
+
+ // Configure the function to transform the input tensor from NCHW -> NHWC
+ _permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U));
+ input_perm->set_data_layout(DataLayout::NHWC);
+
+ // Configure the function to transform the weights tensor from IHW -> HWI
+ _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
+ weights_perm->set_data_layout(DataLayout::NHWC);
+
+ output_perm->set_data_layout(DataLayout::NHWC);
+ output_perm->set_quantization_info(output->quantization_info());
+
+ // Configure optimized depthwise
+ _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
+
+ // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
+ output_perm->set_data_layout(DataLayout::NHWC);
+ _permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U));
+ }
+ else
+ {
+ _dwc_optimized_func->configure(input, weights, biases, output, info);
+ }
+
+ // Configure activation
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function = std::make_unique<cpu::CpuActivation>();
+ _activationlayer_function->configure(output, nullptr, info.act_info);
+ }
+}
+
+Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *output,
+ const ConvolutionInfo &info)
+{
+ return validate_arguments_optimized(input, weights, biases, output, info);
+}
+
+void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::run(ITensorPack &tensors)
+{
+ ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+ prepare(tensors);
+
+ auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
+ auto workspace = tensors.get_tensor(TensorType::ACL_INT_3);
+ auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
+
+ // Permute input
+ if(_permute)
+ {
+ ITensorPack pack;
+ auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
+ auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
+ pack.add_tensor(TensorType::ACL_SRC, src);
+ pack.add_tensor(TensorType::ACL_DST, src_perm);
+ _permute_input->run(pack);
+ }
+
+ // Run assembly function
+ if(_is_nchw)
+ {
+ auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
+ auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
+ auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
+
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
+ pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
+ pack.add_tensor(TensorType::ACL_SRC_2, bias);
+ pack.add_tensor(TensorType::ACL_INT_0, workspace);
+ pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
+ pack.add_tensor(TensorType::ACL_DST, dst_perm);
+ _dwc_optimized_func->run(pack);
+ }
+ else
+ {
+ auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
+ auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
+
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, src);
+ pack.add_tensor(TensorType::ACL_SRC_1, weights);
+ pack.add_tensor(TensorType::ACL_SRC_2, bias);
+ pack.add_tensor(TensorType::ACL_INT_0, workspace);
+ pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
+ pack.add_tensor(TensorType::ACL_DST, dst);
+ _dwc_optimized_func->run(pack);
+ }
+
+ // Permute output
+ if(_is_nchw)
+ {
+ ITensorPack pack;
+ auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
+ pack.add_tensor(TensorType::ACL_SRC, dst_perm);
+ pack.add_tensor(TensorType::ACL_DST, dst);
+ _permute_output->run(pack);
+ }
+
+ // Run activation
+ if(_is_activationlayer_enabled)
+ {
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, dst);
+ pack.add_tensor(TensorType::ACL_DST, dst);
+ _activationlayer_function->run(pack);
+ }
+}
+
+void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare(ITensorPack &tensors)
+{
+ if(!_is_prepared)
+ {
+ auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
+
+ // Permute weights
+ if(_permute)
+ {
+ auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
+
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, weights);
+ pack.add_tensor(TensorType::ACL_DST, permuted_weights);
+ _permute_weights->run(pack);
+
+ ITensorPack pack_opt;
+ pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
+ pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
+ pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
+
+ // Prepare optimized function
+ _dwc_optimized_func->prepare(pack_opt);
+ }
+ else
+ {
+ ITensorPack pack_opt;
+ pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
+ pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
+ pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
+
+ // Prepare optimized function
+ _dwc_optimized_func->prepare(pack_opt);
+ }
+
+ _is_prepared = true;
+ }
+}
+
+CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::CpuDepthwiseConvolutionGeneric()
+ : _depthwise_conv_kernel(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _is_nchw(true), _is_prepared(false),
+ _is_activationlayer_enabled(false)
+{
+}
+
+void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolution::validate(input, weights, (biases == nullptr) ? nullptr : biases,
+ output, info));
+
+ _is_nchw = input->data_layout() == DataLayout::NCHW;
+ _is_prepared = !_is_nchw;
+
+ ITensorInfo *input_to_use = input;
+ const ITensorInfo *weights_to_use = weights;
+ ITensorInfo *output_to_use = output;
+
+ auto input_perm = std::make_unique<TensorInfo>();
+ auto weights_perm = std::make_unique<TensorInfo>();
+ auto output_perm = std::make_unique<TensorInfo>(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
+
+ if(_is_nchw)
+ {
+ _permute_input = std::make_unique<cpu::CpuPermute>();
+ _permute_weights = std::make_unique<cpu::CpuPermute>();
+
+ _permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U));
+ input_perm->set_data_layout(DataLayout::NHWC);
+ input_to_use = input_perm.get();
+
+ _permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
+ weights_perm->set_data_layout(DataLayout::NHWC);
+ weights_to_use = weights_perm.get();
+
+ output_to_use = output_perm.get();
+ }
+
+ _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>();
+ _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
+
+ if(_is_nchw)
+ {
+ _permute_output = std::make_unique<cpu::CpuPermute>();
+ _permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U));
+ output_perm->set_data_layout(DataLayout::NHWC);
+ }
+
+ //Configure Activation Layer
+ _is_activationlayer_enabled = info.act_info.enabled();
+ if(_is_activationlayer_enabled)
+ {
+ _activationlayer_function = std::make_unique<cpu::CpuActivation>();
+ _activationlayer_function->configure(output, nullptr, info.act_info);
+ }
+}
+
+Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
+ const ConvolutionInfo &info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ if(input->data_layout() == DataLayout::NCHW)
+ {
+ TensorShape permuted_input_shape = input->tensor_shape();
+ TensorShape permuted_weights_shape = weights->tensor_shape();
+ TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
+ permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
+ permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
+ permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
+
+ const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
+ const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
+ const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
+
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
+
+ ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(input, weights, biases, output, info));
+ }
+
+ // Validate Activation Layer
+ if(info.act_info.enabled())
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info));
+ }
+
+ return Status{};
+}
+
+void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::run(ITensorPack &tensors)
+{
+ auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
+
+ if(_is_nchw)
+ {
+ prepare(tensors);
+ auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
+ auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
+ auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
+
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, src);
+ pack.add_tensor(TensorType::ACL_DST, src_perm);
+ _permute_input->run(pack);
+
+ ITensorPack pack_depth;
+ pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
+ pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
+ pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
+ pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
+ NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
+ }
+ else
+ {
+ ITensorPack pack_depth;
+ pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
+ pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
+ pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
+ pack_depth.add_tensor(TensorType::ACL_DST, dst);
+ NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
+ }
+
+ if(_is_nchw)
+ {
+ ITensorPack pack;
+ auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
+ pack.add_tensor(TensorType::ACL_SRC, dst_perm);
+ pack.add_tensor(TensorType::ACL_DST, dst);
+ _permute_output->run(pack);
+ }
+
+ if(_is_activationlayer_enabled)
+ {
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, dst);
+ pack.add_tensor(TensorType::ACL_DST, dst);
+ _activationlayer_function->run(pack);
+ }
+}
+
+void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPack &tensors)
+{
+ if(!_is_prepared)
+ {
+ auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
+
+ ARM_COMPUTE_ERROR_ON(!weights->is_used());
+
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, weights);
+ pack.add_tensor(TensorType::ACL_DST, weights_perm);
+
+ _permute_weights->run(pack);
+ weights->mark_as_unused();
+ _is_prepared = true;
+ }
+}
+
+CpuDepthwiseConvolution::CpuDepthwiseConvolution()
+ : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(), _func_generic()
+{
+}
+
+void CpuDepthwiseConvolution::configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
+{
+ _depth_conv_func = get_depthwiseconvolution_function(input, weights, (biases != nullptr) ? biases : nullptr, output, info);
+ switch(_depth_conv_func)
+ {
+ case DepthwiseConvolutionFunction::OPTIMIZED:
+ _func_optimized.configure(input, weights, biases, output, info);
+ break;
+ case DepthwiseConvolutionFunction::GENERIC:
+ _func_generic.configure(input, weights, biases, output, info);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
+ }
+}
+
+Status CpuDepthwiseConvolution::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
+{
+ DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, info);
+ switch(depth_conv_func)
+ {
+ case DepthwiseConvolutionFunction::OPTIMIZED:
+ return CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info);
+ break;
+ case DepthwiseConvolutionFunction::GENERIC:
+ return CpuDepthwiseConvolutionGeneric::validate(input, weights, biases, output, info);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
+ }
+}
+
+DepthwiseConvolutionFunction CpuDepthwiseConvolution::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
+ const ConvolutionInfo &info)
+{
+ if(bool(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info)))
+ {
+ return DepthwiseConvolutionFunction::OPTIMIZED;
+ }
+ else
+ {
+ return DepthwiseConvolutionFunction::GENERIC;
+ }
+}
+
+void CpuDepthwiseConvolution::run(ITensorPack &tensors)
+{
+ switch(_depth_conv_func)
+ {
+ case DepthwiseConvolutionFunction::OPTIMIZED:
+ _func_optimized.run(tensors);
+ break;
+ case DepthwiseConvolutionFunction::GENERIC:
+ _func_generic.run(tensors);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
+ }
+}
+
+void CpuDepthwiseConvolution::prepare(ITensorPack &tensors)
+{
+ switch(_depth_conv_func)
+ {
+ case DepthwiseConvolutionFunction::OPTIMIZED:
+ _func_optimized.prepare(tensors);
+ break;
+ case DepthwiseConvolutionFunction::GENERIC:
+ _func_generic.prepare(tensors);
+ break;
+ default:
+ ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
+ }
+}
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.h b/src/runtime/cpu/operators/CpuDepthwiseConvolution.h
new file mode 100644
index 0000000000..e39cb7db4d
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuDepthwiseConvolution.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_DEQUANTIZATION_H
+#define ARM_COMPUTE_CPU_DEQUANTIZATION_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "src/core/cpu/ICpuKernel.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
+#include "src/runtime/cpu/ICpuOperator.h"
+#include "src/runtime/cpu/operators/CpuActivation.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h"
+#include "src/runtime/cpu/operators/CpuPermute.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Function to execute a depthwise convolution.
+ */
+class CpuDepthwiseConvolution : public ICpuOperator
+{
+public:
+ /** Default constructor */
+ CpuDepthwiseConvolution();
+ /** Initialize the function's source, destination, weights and convolution information.
+ *
+ * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor info. These are 3D tensor infos with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolution
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor info. These are 3D tensors info with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
+
+ /** Static function to choose the best depthwise convolution function for @ref CpuDepthwiseConvolution
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor. Data type supported: same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a Depthwise Convolution Function
+ */
+ static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
+ const ConvolutionInfo &info);
+
+ // Inherited methods overriden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+
+private:
+ /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
+ *
+ * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
+ *
+ * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
+ * -# @ref CpuDepthwiseConvolution3x3Kernel if 3x3 and no assembly kernel implementation is present
+ * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
+ * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
+ * -# @ref NEActivationLayer if fused activation is required
+ *
+ */
+ class CpuDepthwiseConvolutionOptimizedInternal : public ICpuOperator
+ {
+ public:
+ /** Default constructor */
+ CpuDepthwiseConvolutionOptimizedInternal();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConvolutionOptimizedInternal(const CpuDepthwiseConvolutionOptimizedInternal &) = delete;
+ /** Default move constructor */
+ CpuDepthwiseConvolutionOptimizedInternal(CpuDepthwiseConvolutionOptimizedInternal &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConvolutionOptimizedInternal &operator=(const CpuDepthwiseConvolutionOptimizedInternal &) = delete;
+ /** Default move assignment operator */
+ CpuDepthwiseConvolutionOptimizedInternal &operator=(CpuDepthwiseConvolutionOptimizedInternal &&) = default;
+ /** Default destructor */
+ ~CpuDepthwiseConvolutionOptimizedInternal() = default;
+ /** Initialize the function's source, destination, kernels and border_size.
+ *
+ * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolution3x3
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
+
+ // Inherited methods overriden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+
+ private:
+ std::unique_ptr<CpuDepthwiseConvolutionAssemblyDispatch> _dwc_optimized_func{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_input{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_weights{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_output{ nullptr };
+ std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr };
+ bool _has_bias{ false };
+ bool _is_quantized{ false };
+ bool _is_nchw{ true };
+ bool _permute{ false };
+ bool _is_activationlayer_enabled{ false };
+ bool _is_prepared{ false };
+ };
+
+ /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
+ *
+ * -# @ref CpuDepthwiseConvolutionNativeKernel
+ *
+ */
+ class CpuDepthwiseConvolutionGeneric : public ICpuOperator
+ {
+ public:
+ /** Default constructor */
+ CpuDepthwiseConvolutionGeneric();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConvolutionGeneric(const CpuDepthwiseConvolutionGeneric &) = delete;
+ /** Default move constructor */
+ CpuDepthwiseConvolutionGeneric(CpuDepthwiseConvolutionGeneric &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConvolutionGeneric &operator=(const CpuDepthwiseConvolutionGeneric &) = delete;
+ /** Default move assignment operator */
+ CpuDepthwiseConvolutionGeneric &operator=(CpuDepthwiseConvolutionGeneric &&) = default;
+ /** Default destructor */
+ ~CpuDepthwiseConvolutionGeneric() = default;
+ /** Initialize the function's source, destination, weights and convolution information.
+ *
+ * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
+ * @param[out] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionGeneric
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
+ * @param[in] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+
+ private:
+ std::unique_ptr<kernels::CpuDepthwiseConvolutionNativeKernel> _depthwise_conv_kernel{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_input{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_weights{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_output{ nullptr };
+ std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr };
+ bool _is_nchw{ true };
+ bool _is_prepared{ false };
+ bool _is_activationlayer_enabled{ false };
+ };
+
+ DepthwiseConvolutionFunction _depth_conv_func;
+ CpuDepthwiseConvolutionOptimizedInternal _func_optimized;
+ CpuDepthwiseConvolutionGeneric _func_generic;
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_DEQUANTIZATION_H */
diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp
index 101df98b7d..5f5304cded 100644
--- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Utils.h"
@@ -41,6 +41,8 @@
namespace arm_compute
{
+namespace cpu
+{
namespace
{
std::unique_ptr<depthwise::IDepthwiseConvolution> get_qasymm8_convolver(int kernel_size, int stride_x,
@@ -209,40 +211,37 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp32_convolver(int kernel_
}
}
-std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor *input,
- const ITensor *weights,
- ITensor *output,
- PadStrideInfo conv_info,
- ActivationLayerInfo act_info,
- const Size2D &dilation)
+std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ ITensorInfo *output,
+ const ConvolutionInfo &info)
{
- ARM_COMPUTE_UNUSED(dilation);
- const DataType data_type = input->info()->data_type();
- const TensorShape shape = input->info()->tensor_shape();
+ const DataType data_type = input->data_type();
+ const TensorShape shape = input->tensor_shape();
const int n_batches = shape[3];
const int in_rows = shape.z();
const int in_cols = shape.y();
const int n_channels = shape.x();
- const int dilation_factor = dilation.x();
- const int padding_top = conv_info.pad_top();
- const int padding_left = conv_info.pad_left();
- const int padding_bottom = conv_info.pad_bottom();
- const int padding_right = conv_info.pad_right();
+ const int dilation_factor = info.dilation.x();
+ const int padding_top = info.pad_stride_info.pad_top();
+ const int padding_left = info.pad_stride_info.pad_left();
+ const int padding_bottom = info.pad_stride_info.pad_bottom();
+ const int padding_right = info.pad_stride_info.pad_right();
- const bool is_uniform_quantized = (data_type == DataType::QASYMM8) && (weights->info()->data_type() == DataType::QASYMM8);
- const bool is_perchannel_quantized = (data_type == DataType::QASYMM8) && (weights->info()->data_type() == DataType::QSYMM8_PER_CHANNEL);
+ const bool is_uniform_quantized = (data_type == DataType::QASYMM8) && (weights->data_type() == DataType::QASYMM8);
+ const bool is_perchannel_quantized = (data_type == DataType::QASYMM8) && (weights->data_type() == DataType::QSYMM8_PER_CHANNEL);
- const unsigned int stride_x = conv_info.stride().first;
- const unsigned int kernel_size = weights->info()->tensor_shape().y();
+ const unsigned int stride_x = info.pad_stride_info.stride().first;
+ const unsigned int kernel_size = weights->tensor_shape().y();
// Map activation function
neon_convolution_kernels::ActivationFunction activation = neon_convolution_kernels::ActivationFunction::None;
- if(arm_compute::utils::info_helpers::is_relu(act_info))
+ if(arm_compute::utils::info_helpers::is_relu(info.act_info))
{
activation = neon_convolution_kernels::ActivationFunction::ReLU;
}
- else if(arm_compute::utils::info_helpers::is_relu6(act_info))
+ else if(arm_compute::utils::info_helpers::is_relu6(info.act_info))
{
activation = neon_convolution_kernels::ActivationFunction::ReLU6;
}
@@ -250,9 +249,9 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
// Create quantized convolver
if(is_uniform_quantized)
{
- const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform();
- const UniformQuantizationInfo weights_qinfo = weights->info()->quantization_info().uniform();
- const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform();
+ const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform();
+ const UniformQuantizationInfo weights_qinfo = weights->quantization_info().uniform();
+ const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
// Check that quantization info are in the range [0, 255]
ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255);
@@ -274,9 +273,9 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
}
else if(is_perchannel_quantized)
{
- const UniformQuantizationInfo input_qinfo = input->info()->quantization_info().uniform();
- const QuantizationInfo weights_qinfo = weights->info()->quantization_info();
- const UniformQuantizationInfo output_qinfo = output->info()->quantization_info().uniform();
+ const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform();
+ const QuantizationInfo weights_qinfo = weights->quantization_info();
+ const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
// Check that quantization info are in the range [0, 255]
ARM_COMPUTE_ERROR_ON(input_qinfo.offset < 0 || input_qinfo.offset > 255);
@@ -328,83 +327,75 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
}
} // namespace
-struct NEDepthwiseConvolutionAssemblyDispatch::LocalImpl
+struct CpuDepthwiseConvolutionAssemblyDispatch::LocalImpl
{
- std::unique_ptr<depthwise::IDepthwiseConvolution> _dwc_assembly_kernel{ nullptr };
- NEDepthwiseConvolutionAssemblyKernelWrapper _dwc_acl_kernel{};
+ std::unique_ptr<depthwise::IDepthwiseConvolution> dwc_assembly_kernel{ nullptr };
+ NEDepthwiseConvolutionAssemblyKernelWrapper dwc_acl_kernel{};
+ bool is_prepared{ false };
+ experimental::MemoryRequirements mem_req{};
};
#ifndef DOXYGEN_SKIP_THIS
-NEDepthwiseConvolutionAssemblyDispatch::NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<arm_compute::IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr), _packed_weights(), _workspace(), _is_prepared(false),
- _pImpl(std::make_unique<LocalImpl>())
+CpuDepthwiseConvolutionAssemblyDispatch::CpuDepthwiseConvolutionAssemblyDispatch()
+ : _pImpl(std::make_unique<LocalImpl>())
{
}
#endif /* DOXYGEN_SKIP_THIS */
-NEDepthwiseConvolutionAssemblyDispatch::~NEDepthwiseConvolutionAssemblyDispatch() = default;
+CpuDepthwiseConvolutionAssemblyDispatch::~CpuDepthwiseConvolutionAssemblyDispatch() = default;
-void NEDepthwiseConvolutionAssemblyDispatch::configure(const ITensor *input,
- const ITensor *weights,
- const ITensor *bias,
- ITensor *output,
- const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
- const ActivationLayerInfo &act_info,
- const Size2D &dilation)
+void CpuDepthwiseConvolutionAssemblyDispatch::configure(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ ITensorInfo *output,
+ const ConvolutionInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_UNUSED(depth_multiplier);
- ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionAssemblyDispatch::validate(input->info(),
- weights->info(),
- bias != nullptr ? bias->info() : nullptr,
- output->info(),
- conv_info,
- depth_multiplier,
- act_info,
- dilation));
+ ARM_COMPUTE_UNUSED(bias);
+ ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionAssemblyDispatch::validate(input,
+ weights,
+ bias != nullptr ? bias : nullptr,
+ output,
+ info));
// Output auto inizialitation if not yet initialized
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);
- auto_init_if_empty(*output->info(), input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info()));
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
- _input = input;
- _weights = weights;
- _bias = bias;
- _output = output;
- _is_prepared = false;
+ _pImpl->is_prepared = false;
// Create convolver
- _pImpl->_dwc_assembly_kernel = create_convolver(input, weights, output, conv_info, act_info, dilation);
- ARM_COMPUTE_ERROR_ON(_pImpl->_dwc_assembly_kernel == nullptr);
+ _pImpl->dwc_assembly_kernel = create_convolver(input, weights, output, info);
+ ARM_COMPUTE_ERROR_ON(_pImpl->dwc_assembly_kernel == nullptr);
// Create assembly kernel wrapper
- _pImpl->_dwc_acl_kernel.configure(_pImpl->_dwc_assembly_kernel.get());
+ _pImpl->dwc_acl_kernel.configure(_pImpl->dwc_assembly_kernel.get());
constexpr size_t alignment = 128;
// Create workspace
const unsigned int num_threads = NEScheduler::get().num_threads();
- const size_t workspace_size = _pImpl->_dwc_assembly_kernel->get_working_space_size(num_threads);
+ const size_t workspace_size = _pImpl->dwc_assembly_kernel->get_working_space_size(num_threads);
ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "Workspace size cannot be 0 !");
- _workspace.allocator()->init(TensorInfo(TensorShape{ workspace_size }, 1, DataType::S8), alignment);
- _memory_group.manage(&_workspace);
- _workspace.allocator()->allocate();
+ _pImpl->mem_req.push_back({ TensorType::ACL_INT_0, workspace_size, alignment });
// Create packing tensor
- const size_t pack_tensor_size = _pImpl->_dwc_assembly_kernel->get_packed_params_size();
+ const size_t pack_tensor_size = _pImpl->dwc_assembly_kernel->get_packed_params_size();
ARM_COMPUTE_ERROR_ON_MSG(pack_tensor_size == 0, "Pack tensor size cannot be 0 !");
- _packed_weights.allocator()->init(TensorInfo(TensorShape{ pack_tensor_size }, 1, DataType::S8), alignment);
+
+ _pImpl->mem_req.push_back({ TensorType::ACL_INT_1, pack_tensor_size, alignment });
+}
+
+experimental::MemoryRequirements CpuDepthwiseConvolutionAssemblyDispatch::workspace() const
+{
+ return _pImpl->mem_req;
}
-Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo *input,
- const ITensorInfo *weights,
- const ITensorInfo *bias,
- const ITensorInfo *output,
- const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
- const ActivationLayerInfo &act_info,
- const Size2D &dilation)
+Status CpuDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *output,
+ const ConvolutionInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
@@ -415,12 +406,12 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
// Validate convolver
- ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(input, weights, conv_info, depth_multiplier, dilation));
+ ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(input, weights, info));
// Validate activation
- const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);
- const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);
- ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !(is_relu || is_relu6));
+ const bool is_relu = arm_compute::utils::info_helpers::is_relu(info.act_info);
+ const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(info.act_info);
+ ARM_COMPUTE_RETURN_ERROR_ON(info.act_info.enabled() && !(is_relu || is_relu6));
// Check bias
if(bias != nullptr)
@@ -433,7 +424,7 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo
// Check output
if(output->total_size() != 0)
{
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -451,11 +442,9 @@ Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo
return Status{};
}
-bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITensorInfo *input,
- const ITensorInfo *weights,
- PadStrideInfo conv_info,
- unsigned int depth_multiplier,
- const Size2D &dilation)
+bool CpuDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITensorInfo *input,
+ const ITensorInfo *weights,
+ const ConvolutionInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
@@ -486,84 +475,90 @@ bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITenso
bool weights_supported = (kernel_w == kernel_h) && (supported_kernel_sizes.count(kernel_w) != 0);
// Check for supported strides
- const auto &strides = conv_info.stride();
+ const auto &strides = info.pad_stride_info.stride();
bool supported_strides = (strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2));
// Check for supported padding
- const auto pad_top = conv_info.pad_top();
- const auto pad_right = conv_info.pad_right();
- const auto pad_bottom = conv_info.pad_bottom();
- const auto pad_left = conv_info.pad_left();
- PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(kernel_w, kernel_h), conv_info, DataLayout::NCHW, dilation);
+ const auto pad_top = info.pad_stride_info.pad_top();
+ const auto pad_right = info.pad_stride_info.pad_right();
+ const auto pad_bottom = info.pad_stride_info.pad_bottom();
+ const auto pad_left = info.pad_stride_info.pad_left();
+ PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(kernel_w, kernel_h), info.pad_stride_info, DataLayout::NCHW, info.dilation);
bool is_same_padding = (pad_top == same_pad.pad_top()) && (pad_right == same_pad.pad_right()) && (pad_bottom == same_pad.pad_bottom()) && (pad_left == same_pad.pad_left());
bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0);
bool supported_padding = is_same_padding || is_valid_padding;
// TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
- bool is_dilation_supported = ((dilation == Size2D(1U, 1U)) || ((dilation.x() == dilation.y()) && strides.first == 1));
+ bool is_dilation_supported = ((info.dilation == Size2D(1U, 1U)) || ((info.dilation.x() == info.dilation.y()) && strides.first == 1));
if(weights_type == DataType::QSYMM8_PER_CHANNEL)
{
- is_dilation_supported = is_dilation_supported && (dilation == Size2D(1U, 1U));
+ is_dilation_supported = is_dilation_supported && (info.dilation == Size2D(1U, 1U));
}
- return is_input_type_valid && is_weights_type_valid && weights_supported && supported_strides && supported_padding && (depth_multiplier == 1) && is_dilation_supported;
+ return is_input_type_valid && is_weights_type_valid && weights_supported && supported_strides && supported_padding && (info.depth_multiplier == 1) && is_dilation_supported;
}
-void NEDepthwiseConvolutionAssemblyDispatch::run()
+void CpuDepthwiseConvolutionAssemblyDispatch::run(ITensorPack &tensors)
{
// Prepare assembly kernel
- prepare();
+ prepare(tensors);
- MemoryGroupResourceScope scope_mg(_memory_group);
+ auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
+ auto workspace = tensors.get_tensor(TensorType::ACL_INT_0);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
// Setup inputs/outputs
- ARM_COMPUTE_ERROR_ON(_workspace.buffer() == nullptr);
- _pImpl->_dwc_assembly_kernel->set_working_space(static_cast<void *>(_workspace.buffer()));
-
- ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);
- const int input_element_size = _input->info()->element_size();
- const int input_batch_stride = _input->info()->strides_in_bytes()[3] / input_element_size;
- const int input_row_stride = _input->info()->strides_in_bytes().z() / input_element_size;
- const int input_col_stride = _input->info()->strides_in_bytes().y() / input_element_size;
- const void *input_ptr = _input->buffer() + _input->info()->offset_first_element_in_bytes();
- _pImpl->_dwc_assembly_kernel->set_input(input_ptr, input_batch_stride, input_row_stride, input_col_stride);
-
- ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
- const int output_element_size = _output->info()->element_size();
- const int output_batch_stride = _output->info()->strides_in_bytes()[3] / output_element_size;
- const int output_row_stride = _output->info()->strides_in_bytes().z() / output_element_size;
- const int output_col_stride = _output->info()->strides_in_bytes().y() / output_element_size;
- void *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes();
- _pImpl->_dwc_assembly_kernel->set_output(output_ptr, output_batch_stride, output_row_stride, output_col_stride);
+ ARM_COMPUTE_ERROR_ON(workspace == nullptr && workspace->buffer() == nullptr);
+ _pImpl->dwc_assembly_kernel->set_working_space(static_cast<void *>(workspace->buffer()));
+
+ ARM_COMPUTE_ERROR_ON(workspace->buffer() == nullptr);
+ const int input_element_size = src->info()->element_size();
+ const int input_batch_stride = src->info()->strides_in_bytes()[3] / input_element_size;
+ const int input_row_stride = src->info()->strides_in_bytes().z() / input_element_size;
+ const int input_col_stride = src->info()->strides_in_bytes().y() / input_element_size;
+ const void *input_ptr = src->buffer() + src->info()->offset_first_element_in_bytes();
+ _pImpl->dwc_assembly_kernel->set_input(input_ptr, input_batch_stride, input_row_stride, input_col_stride);
+
+ ARM_COMPUTE_ERROR_ON(dst->buffer() == nullptr);
+ const int output_element_size = dst->info()->element_size();
+ const int output_batch_stride = dst->info()->strides_in_bytes()[3] / output_element_size;
+ const int output_row_stride = dst->info()->strides_in_bytes().z() / output_element_size;
+ const int output_col_stride = dst->info()->strides_in_bytes().y() / output_element_size;
+ void *output_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes();
+ _pImpl->dwc_assembly_kernel->set_output(output_ptr, output_batch_stride, output_row_stride, output_col_stride);
// Schedule assembly kernel
- NEScheduler::get().schedule(&_pImpl->_dwc_acl_kernel, Window::DimX);
+ NEScheduler::get().schedule(&_pImpl->dwc_acl_kernel, Window::DimX);
}
-void NEDepthwiseConvolutionAssemblyDispatch::prepare()
+void CpuDepthwiseConvolutionAssemblyDispatch::prepare(ITensorPack &tensors)
{
- if(!_is_prepared)
+ if(!_pImpl->is_prepared)
{
- _packed_weights.allocator()->allocate();
- ARM_COMPUTE_ERROR_ON(_packed_weights.buffer() == nullptr);
+ auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_1);
+
+ ARM_COMPUTE_ERROR_ON(packed_weights->buffer() == nullptr);
// Pack weights and bias
- const int weights_element_size = _weights->info()->element_size();
- const int weights_row_stride = _weights->info()->strides_in_bytes().z() / weights_element_size;
- const int weights_col_stride = _weights->info()->strides_in_bytes().y() / weights_element_size;
- _pImpl->_dwc_assembly_kernel->pack_params(_packed_weights.buffer(),
- _weights->buffer() + _weights->info()->offset_first_element_in_bytes(),
- weights_row_stride,
- weights_col_stride,
- (_bias != nullptr) ? _bias->buffer() : nullptr);
- _pImpl->_dwc_assembly_kernel->set_packed_params_buffer(_packed_weights.buffer());
-
- _weights->mark_as_unused();
- if(_bias != nullptr)
+ const int weights_element_size = weights->info()->element_size();
+ const int weights_row_stride = weights->info()->strides_in_bytes().z() / weights_element_size;
+ const int weights_col_stride = weights->info()->strides_in_bytes().y() / weights_element_size;
+ _pImpl->dwc_assembly_kernel->pack_params(packed_weights->buffer(),
+ weights->buffer() + weights->info()->offset_first_element_in_bytes(),
+ weights_row_stride,
+ weights_col_stride,
+ (bias != nullptr) ? bias->buffer() : nullptr);
+ _pImpl->dwc_assembly_kernel->set_packed_params_buffer(packed_weights->buffer());
+
+ weights->mark_as_unused();
+ if(bias != nullptr)
{
- _bias->mark_as_unused();
+ bias->mark_as_unused();
}
- _is_prepared = true;
+ _pImpl->is_prepared = true;
}
}
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h b/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h
new file mode 100644
index 0000000000..6aac74c3ef
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
+#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
+
+#include "src/runtime/cpu/ICpuOperator.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Depthwise convolution assembly kernel glue */
+class CpuDepthwiseConvolutionAssemblyDispatch : public ICpuOperator
+{
+public:
+ CpuDepthwiseConvolutionAssemblyDispatch();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConvolutionAssemblyDispatch(const CpuDepthwiseConvolutionAssemblyDispatch &) = delete;
+ /** Default move constructor */
+ CpuDepthwiseConvolutionAssemblyDispatch(CpuDepthwiseConvolutionAssemblyDispatch &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConvolutionAssemblyDispatch &operator=(const CpuDepthwiseConvolutionAssemblyDispatch &) = delete;
+ /** Default move assignment operator */
+ CpuDepthwiseConvolutionAssemblyDispatch &operator=(CpuDepthwiseConvolutionAssemblyDispatch &&) = default;
+ /** Default destructor */
+ ~CpuDepthwiseConvolutionAssemblyDispatch();
+ /** Initialize the function's source, destination, kernels and border_size.
+ *
+ * @note Supports only NHWC format
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
+ * @param[in] bias (Optional) Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const ConvolutionInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionAssemblyDispatch
+ *
+ * @note Supports only NHWC format
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
+ * @param[in] bias (Optional) Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input.
+ * @param[out] output Destination tensor info. Data type supported: same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return An error status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const ConvolutionInfo &info);
+ /** Check if the optimized kernel can be used for the given kernel sizes and strides
+ *
+ * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC
+ *
+ * @param[in] input Input tensor info.
+ * @param[in] weights Weights tensor info.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed.
+ */
+ static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, const ConvolutionInfo &info);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+ experimental::MemoryRequirements workspace() const override;
+
+private:
+ struct LocalImpl;
+ std::unique_ptr<LocalImpl> _pImpl;
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */
diff --git a/tests/NEON/Helper.h b/tests/NEON/Helper.h
index 714152ebcd..c9e53d11b0 100644
--- a/tests/NEON/Helper.h
+++ b/tests/NEON/Helper.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,9 @@
#include "arm_compute/runtime/Array.h"
#include "arm_compute/runtime/NEON/INESimpleFunction.h"
#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/core/NEON/kernels/NEFillBorderKernel.h"
+#include "src/runtime/cpu/ICpuOperator.h"
#include "tests/Globals.h"
#include <algorithm>
@@ -104,7 +106,7 @@ public:
/** As above but this also setups a Zero border on the input tensor of the kernel's bordersize */
template <typename K>
-class NESynthetizeFunctionWithZeroConstantKernelBorder : public INESimpleFunction
+class NESynthetizeFunctionWithZeroConstantKernelBorder : public cpu::ICpuOperator
{
public:
/** Configure the kernel.
@@ -123,6 +125,15 @@ public:
b->configure(first, BorderSize(_kernel->border_size()), BorderMode::CONSTANT, PixelValue());
_border_handler = std::move(b);
}
+
+ void run(ITensorPack &tensors)
+ {
+ NEScheduler::get().schedule(_border_handler.get(), Window::DimZ);
+ NEScheduler::get().schedule_op(_kernel.get(), Window::DimY, _kernel->window(), tensors);
+ }
+
+private:
+ std::unique_ptr<INEKernel> _border_handler{ nullptr };
};
} // namespace test
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp
index 3314227bec..5c2ebaa51f 100644
--- a/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp
+++ b/tests/validation/NEON/DepthwiseConvolutionLayerNative.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
#include "tests/NEON/Accessor.h"
#include "tests/NEON/Helper.h"
#include "tests/framework/Macros.h"
@@ -37,12 +37,12 @@ namespace validation
{
using namespace arm_compute::misc::shape_calculator;
-// Create function for NEDepthwiseConvolutionLayerKernel
-using NEDepthwiseConvolutionLayerNative = NESynthetizeFunctionWithZeroConstantKernelBorder<NEDepthwiseConvolutionLayerNativeKernel>;
+// Create function for CpuDepthwiseConvolutionKernel
+using CpuDepthwiseConvolutionNative = NESynthetizeFunctionWithZeroConstantKernelBorder<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>;
// Fixture for NEDepthwiseConvolutionLayerKernel
template <typename T>
-using NEDepthwiseConvolutionLayerNativeFixture = DepthwiseConvolutionLayerNativeValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayerNative, T>;
+using CpuDepthwiseConvolutionNativeFixture = DepthwiseConvolutionLayerNativeValidationFixture<Tensor, Accessor, CpuDepthwiseConvolutionNative, T>;
namespace
{
@@ -124,8 +124,9 @@ TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL)
auto biases = create_tensor<Tensor>(bias_shape, data_type, 1, QuantizationInfo(), data_layout);
auto dst = create_tensor<Tensor>(TensorShape(), data_type, 1, QuantizationInfo(), data_layout);
- NEDepthwiseConvolutionLayerNativeKernel dwc;
- dwc.configure(&src, &weights, &biases, &dst, pad_stride_info);
+ cpu::kernels::CpuDepthwiseConvolutionNativeKernel dwc;
+ const ConvolutionInfo info{pad_stride_info, 1, ActivationLayerInfo(), Size2D(1, 1)};
+ dwc.configure(src.info(), weights.info(), biases.info(), dst.info(), info);
ARM_COMPUTE_EXPECT(src.info()->padding().empty(), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(weights.info()->padding().empty(), framework::LogLevel::ERRORS);
@@ -135,7 +136,7 @@ TEST_CASE(ValidateNoPadding, framework::DatasetMode::ALL)
TEST_SUITE(Float)
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::ALL,
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(width_values_precommit,
height_values_precommit),
channel_values_precommit),
@@ -152,7 +153,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, NEDepthwiseConvolutionLayerNativeFixture<fl
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE_NEW(RunLarge, NEDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CpuDepthwiseConvolutionNativeFixture<float>, framework::DatasetMode::NIGHTLY,
combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(width_values_nightly,
height_values_nightly),
channel_values_nightly),
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
index e87e31f97b..a0ff2c5ac9 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -78,9 +78,10 @@ public:
_weights_shape = TensorShape(kernel_size.width, kernel_size.height);
- const TensorInfo in_info(_input_shape, 1, _input_data_type);
- const TensorInfo we_info(_weights_shape, 1, _weights_data_type);
- _output_shape = compute_depthwise_convolution_shape(in_info, we_info, _pad_stride_info, _depth_multiplier, _dilation);
+ const TensorInfo in_info(_input_shape, 1, _input_data_type);
+ const TensorInfo we_info(_weights_shape, 1, _weights_data_type);
+ const ConvolutionInfo info{ _pad_stride_info, _depth_multiplier, _act_info, _dilation };
+ _output_shape = compute_depthwise_convolution_shape(in_info, we_info, info);
_weights_shape.set(2, _output_shape.z());
_biases_shape = TensorShape(_weights_shape[2]);
@@ -301,7 +302,11 @@ public:
_target = create_tensor<TensorType>(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout);
// Create Depthwise Convolution configure function
- _dwc.configure(&_src, &_weights, &_biases, &_target, _conv_info, _depth_multiplier, _dilation);
+ const ConvolutionInfo info
+ {
+ _conv_info, _depth_multiplier, ActivationLayerInfo(), _dilation
+ };
+ _dwc.configure(_src.info(), _weights.info(), _biases.info(), _target.info(), info);
ARM_COMPUTE_EXPECT(_src.info()->is_resizable(), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(_weights.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -329,8 +334,14 @@ public:
fill(AccessorType(_weights), 1);
fill(AccessorType(_biases), 2);
+ arm_compute::ITensorPack pack;
+ pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_0, &_src);
+ pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_1, &_weights);
+ pack.add_const_tensor(arm_compute::TensorType::ACL_SRC_2, &_biases);
+ pack.add_tensor(arm_compute::TensorType::ACL_DST, &_target);
+
// Compute function
- _dwc.run();
+ _dwc.run(pack);
}
void compute_reference()
@@ -343,9 +354,9 @@ public:
fill(weights, 1);
fill(biases, 2);
- const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), _conv_info,
- _depth_multiplier, _dilation);
- _reference = reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation);
+ const ConvolutionInfo info{ _conv_info, _depth_multiplier, ActivationLayerInfo(), _dilation };
+ const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info);
+ _reference = reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation);
}
protected:
@@ -485,9 +496,9 @@ public:
fill(weights, 1);
fill(biases, 2);
- const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), _conv_info,
- _depth_multiplier, _dilation);
- _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info);
+ const ConvolutionInfo info{ _conv_info, _depth_multiplier, _act_info, _dilation };
+ const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info);
+ _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info);
}
protected: