aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2021-04-08 12:02:58 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2021-04-19 13:45:08 +0000
commit60c3b0e6821a80d78ffca5be30e05d062d071cd2 (patch)
tree3e263a45aa9617cfd7704b2b33ea4337f1582321 /src/core
parent4f1650f0c9919f0bac5024b8e31c0f754d25aec3 (diff)
downloadComputeLibrary-60c3b0e6821a80d78ffca5be30e05d062d071cd2.tar.gz
Port DepthwiseConvolution to new API
Resolves: COMPMID-4185 Change-Id: Ib5f22356356a022d567bb18d44ea272b62d10ebf Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5424 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp9
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp8
-rw-r--r--src/core/NEON/NEKernels.h1
-rw-r--r--src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h131
-rw-r--r--src/core/cpu/kernels/CpuActivationKernel.cpp2
-rw-r--r--src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp (renamed from src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp)139
-rw-r--r--src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h117
8 files changed, 206 insertions, 206 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index e8daa56f36..287a965f5b 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -105,7 +105,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
if(output->total_size() != 0)
{
- const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
}
@@ -116,7 +117,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
unsigned int depth_multiplier, GPUTarget gpu_target, std::string &kernel_name, const Size2D dilation)
{
// Output auto inizialitation if not yet initialized
- const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info
+ {
+ conv_info, depth_multiplier, ActivationLayerInfo(), dilation
+ };
+ const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, info);
auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
const unsigned int conv_stride_x = conv_info.stride().first;
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 6400ba53e2..f7603e6397 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -65,8 +65,9 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
const size_t weights_width = 3;
const size_t weights_height = 3;
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(
- *input, TensorInfo(TensorShape(weights_width, weights_height), 1, weights->data_type()).set_data_layout(DataLayout::NCHW), conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(
+ *input, TensorInfo(TensorShape(weights_width, weights_height), 1, weights->data_type()).set_data_layout(DataLayout::NCHW), info);
if(is_qasymm)
{
DepthwiseConvolutionReshapeInfo info;
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
index c34018a000..fcfa7f878d 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,7 +58,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
ARM_COMPUTE_UNUSED(idx_c);
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_c) != (input->dimension(idx_c) * depth_multiplier));
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
const bool is_quantized = is_data_type_quantized(input->data_type());
@@ -156,7 +157,8 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &
auto padding_info = get_padding_info({ input, output });
- const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*(input->info()), *(weights->info()), conv_info, depth_multiplier, dilation);
+ const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation };
+ const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*(input->info()), *(weights->info()), info);
auto_init_if_empty(*(output->info()), input->info()->clone()->set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info()));
_input = input;
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 264f521be2..e982470402 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -38,7 +38,6 @@
#include "src/core/NEON/kernels/NECropKernel.h"
#include "src/core/NEON/kernels/NEDepthConvertLayerKernel.h"
#include "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
-#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
#include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
#include "src/core/NEON/kernels/NEFFTScaleKernel.h"
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
deleted file mode 100644
index 713cdcd9d9..0000000000
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "src/core/NEON/INEKernel.h"
-#include "support/Requires.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_neon.h>
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEDepthwiseConvolutionLayerNativeKernel";
- }
- /** Default constructor */
- NEDepthwiseConvolutionLayerNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Default Move Constructor. */
- NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Default move assignment operator */
- NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Default destructor */
- ~NEDepthwiseConvolutionLayerNativeKernel() = default;
- /** Initialize the function's source, destination and parameters.
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- */
- void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
- /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
- *
- * @note Supported data layouts: NHWC
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
- const Size2D &dilation = Size2D(1U, 1U));
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- template <typename T>
- using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
-
- template <typename T, typename TW, FloatEnalber<T> = 0>
- void run_depthwise(const Window &window, bool has_biases);
-
- template <typename T>
- using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
-
- template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
- void run_depthwise(const Window &window, bool has_biases);
-
- /** Common signature for all the specialised depthwise convolution native functions
- *
- * @param[in] window Region on which to execute the kernel.
- */
- using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
-
- DepthwiseFunctionPtr _func;
- const ITensor *_input;
- const ITensor *_weights;
- const ITensor *_biases;
- ITensor *_output;
- PadStrideInfo _conv_info;
- unsigned int _depth_multiplier;
- Size2D _dilation;
- std::vector<int> _output_multiplier;
- std::vector<int> _output_shift;
- bool _has_biases;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
diff --git a/src/core/cpu/kernels/CpuActivationKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp
index 761258941d..eb38c18cff 100644
--- a/src/core/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/core/cpu/kernels/CpuActivationKernel.cpp
@@ -205,7 +205,7 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *src,
void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src);
_act_info = activation_info;
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp
index 24fd01fee1..a5d1b61c08 100644
--- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp
+++ b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp
@@ -21,8 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/CPP/Validate.h"
@@ -35,6 +37,10 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
constexpr auto data_layout = DataLayout::NHWC;
@@ -716,19 +722,18 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso
input_it, weights_it, biases_it, output_it);
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
- const Size2D &dilation)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier == 0);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (dilation.x() - 1) > input->dimension(1) + conv_info.pad_left() + conv_info.pad_right());
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (dilation.y() - 1) > input->dimension(2) + conv_info.pad_top() + conv_info.pad_bottom());
- ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(0) * depth_multiplier) != weights->dimension(0));
- ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1));
- ARM_COMPUTE_RETURN_ERROR_ON((conv_info.stride().first < 1) || (conv_info.stride().second < 1));
+ ARM_COMPUTE_RETURN_ERROR_ON(info.depth_multiplier == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (info.dilation.x() - 1) > input->dimension(1) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right());
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (info.dilation.y() - 1) > input->dimension(2) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom());
+ ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(0) * info.depth_multiplier) != weights->dimension(0));
+ ARM_COMPUTE_RETURN_ERROR_ON((info.dilation.x() < 1) || (info.dilation.y() < 1));
+ ARM_COMPUTE_RETURN_ERROR_ON((info.pad_stride_info.stride().first < 1) || (info.pad_stride_info.stride().second < 1));
if(is_data_type_quantized_per_channel(weights->data_type()))
{
@@ -757,7 +762,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
if(output->total_size() != 0)
{
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier, dilation);
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
}
@@ -766,35 +771,30 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
}
} // namespace
-NEDepthwiseConvolutionLayerNativeKernel::NEDepthwiseConvolutionLayerNativeKernel()
- : _func(), _input(), _weights(), _biases(), _output(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases()
+CpuDepthwiseConvolutionNativeKernel::CpuDepthwiseConvolutionNativeKernel()
+ : _func(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases()
{
}
-void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
- const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation)
+void CpuDepthwiseConvolutionNativeKernel::configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, dilation));
-
- _input = input;
- _weights = weights;
- _biases = biases;
- _output = output;
- _conv_info = conv_info;
- _depth_multiplier = depth_multiplier;
- _dilation = dilation;
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, weights, (biases != nullptr) ? biases : nullptr, output, info));
+
+ _conv_info = info.pad_stride_info;
+ _depth_multiplier = info.depth_multiplier;
+ _dilation = info.dilation;
_has_biases = (biases != nullptr);
- if(is_data_type_quantized(_input->info()->data_type()))
+ if(is_data_type_quantized(input->data_type()))
{
- const auto input_scale = input->info()->quantization_info().uniform().scale;
- const auto output_scale = output->info()->quantization_info().uniform().scale;
+ const auto input_scale = input->quantization_info().uniform().scale;
+ const auto output_scale = output->quantization_info().uniform().scale;
- auto weights_scale = weights->info()->quantization_info().scale();
- if(!is_data_type_quantized_per_channel(_weights->info()->data_type()))
+ auto weights_scale = weights->quantization_info().scale();
+ if(!is_data_type_quantized_per_channel(weights->data_type()))
{
- for(size_t i = 1; i < _weights->info()->dimension(channel_idx); ++i)
+ for(size_t i = 1; i < weights->dimension(channel_idx); ++i)
{
weights_scale.push_back(weights_scale.front());
}
@@ -812,100 +812,107 @@ void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, co
}
}
- switch(_weights->info()->data_type())
+ switch(weights->data_type())
{
case DataType::QASYMM8:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, uint8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, uint8_t>;
break;
case DataType::QASYMM8_SIGNED:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>;
break;
case DataType::QSYMM8_PER_CHANNEL:
- if(_input->info()->data_type() == DataType::QASYMM8)
+ if(input->data_type() == DataType::QASYMM8)
{
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<uint8_t, int8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, int8_t>;
}
else
{
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<int8_t, int8_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>;
}
break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<float16_t, float16_t>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float16_t, float16_t>;
break;
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F32:
- _func = &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise<float, float>;
+ _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float, float>;
break;
default:
ARM_COMPUTE_ERROR("Data type not supported");
break;
}
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier, dilation);
- auto_init_if_empty(*output->info(), input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->info()->quantization_info()));
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
+ auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
- Window win = calculate_max_window(*output->info(), Steps());
- INEKernel::configure(win);
+ Window win = calculate_max_window(*output, Steps());
+ ICpuKernel::configure(win);
}
-Status NEDepthwiseConvolutionLayerNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
- const Size2D &dilation)
+Status CpuDepthwiseConvolutionNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, dilation));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, info));
return Status{};
}
-void NEDepthwiseConvolutionLayerNativeKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- (this->*_func)(window, _has_biases);
-}
-
-template <typename T, typename TW, NEDepthwiseConvolutionLayerNativeKernel::FloatEnalber<T>>
-void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window, bool has_biases)
+template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::FloatEnalber<T>>
+void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases,
+ ITensor *dst, const Window &window, bool has_biases)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
if(_depth_multiplier == 1)
{
- depthwise_loop_multiplier1_fp<T>(_input, _weights, _biases, _output, _conv_info, _dilation, window, has_biases);
+ depthwise_loop_multiplier1_fp<T>(src, weights, biases, dst, _conv_info, _dilation, window, has_biases);
}
else
{
- depthwise_loop_generic_fp<T>(_input, _weights, _biases, _output, _conv_info, _dilation, _depth_multiplier, window, has_biases);
+ depthwise_loop_generic_fp<T>(src, weights, biases, dst, _conv_info, _dilation, _depth_multiplier, window, has_biases);
}
}
-template <typename T, typename TW, NEDepthwiseConvolutionLayerNativeKernel::Quantized8bitEnalber<T>>
-void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window, bool has_biases)
+template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::Quantized8bitEnalber<T>>
+void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases,
+ ITensor *dst, const Window &window, bool has_biases)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
if(_depth_multiplier == 1)
{
- depthwise_loop_multiplier1_quantized<T, TW>(_input, _weights, _biases, _output, _conv_info, _dilation, _output_multiplier, _output_shift, window, has_biases);
+ depthwise_loop_multiplier1_quantized<T, TW>(src, weights, biases, dst, _conv_info, _dilation, _output_multiplier, _output_shift, window, has_biases);
}
else
{
const bool is_pow2 = ((_depth_multiplier & (_depth_multiplier - 1)) == 0);
- const bool is_quantized_per_tensor = !(is_data_type_quantized_per_channel(_weights->info()->data_type()));
+ const bool is_quantized_per_tensor = !(is_data_type_quantized_per_channel(weights->info()->data_type()));
if(is_pow2 && is_quantized_per_tensor && _depth_multiplier >= 8)
{
- depthwise_loop_pow2_quantized_per_tensor<T, TW>(_input, _weights, _biases, _output, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
+ depthwise_loop_pow2_quantized_per_tensor<T, TW>(src, weights, biases, dst, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
}
else
{
- depthwise_loop_generic_quantized<T, TW>(_input, _weights, _biases, _output, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
+ depthwise_loop_generic_quantized<T, TW>(src, weights, biases, dst, _conv_info, _dilation, _depth_multiplier, _output_multiplier, _output_shift, window, has_biases);
}
}
}
+
+void CpuDepthwiseConvolutionNativeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+ const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ const auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ const auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
+ (this->*_func)(src, weights, biases, dst, window, _has_biases);
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h
new file mode 100644
index 0000000000..242536d441
--- /dev/null
+++ b/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H
+#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H
+
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
+#include "support/Requires.h"
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_neon.h>
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+namespace arm_compute
+{
+namespace cpu
+{
+namespace kernels
+{
+/** Interface for the kernel to run a depthwise convolution native on a tensor. */
+class CpuDepthwiseConvolutionNativeKernel : public ICpuKernel
+{
+public:
+ const char *name() const override
+ {
+ return "CpuDepthwiseConvolutionNativeKernel";
+ }
+ /** Default constructor */
+ CpuDepthwiseConvolutionNativeKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConvolutionNativeKernel);
+
+ /** Initialize the function's source, destination and parameters.
+ *
+ * @note Supported data layouts: NHWC
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ */
+ void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionNativeKernel
+ *
+ * @note Supported data layouts: NHWC
+ *
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+
+private:
+ template <typename T>
+ using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
+
+ template <typename T, typename TW, FloatEnalber<T> = 0>
+ void run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
+
+ template <typename T>
+ using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
+
+ template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
+ void run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
+
+ /** Common signature for all the specialised depthwise convolution native functions
+ *
+ * @param[in] window Region on which to execute the kernel.
+ */
+ using DepthwiseFunctionPtr = void (CpuDepthwiseConvolutionNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
+
+ DepthwiseFunctionPtr _func;
+ PadStrideInfo _conv_info;
+ unsigned int _depth_multiplier;
+ Size2D _dilation;
+ std::vector<int> _output_multiplier;
+ std::vector<int> _output_shift;
+ bool _has_biases;
+};
+} // namespace kernels
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H */