aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/CL/functions/CLDirectConvolutionLayer.cpp16
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp14
-rw-r--r--src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp66
-rw-r--r--src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp16
-rw-r--r--src/runtime/NEON/functions/NEPoolingLayer.cpp16
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp (renamed from src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp)162
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConv2d.h213
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp (renamed from src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp)108
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h (renamed from src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h)53
-rw-r--r--src/runtime/cpu/operators/CpuDepthwiseConvolution.h230
-rw-r--r--src/runtime/cpu/operators/CpuDirectConv2d.cpp (renamed from src/runtime/cpu/operators/CpuDirectConvolution.cpp)22
-rw-r--r--src/runtime/cpu/operators/CpuDirectConv2d.h (renamed from src/runtime/cpu/operators/CpuDirectConvolution.h)58
-rw-r--r--src/runtime/cpu/operators/CpuPool2d.cpp (renamed from src/runtime/cpu/operators/CpuPooling.cpp)28
-rw-r--r--src/runtime/cpu/operators/CpuPool2d.h (renamed from src/runtime/cpu/operators/CpuPooling.h)38
-rw-r--r--src/runtime/gpu/cl/operators/ClDirectConv2d.cpp (renamed from src/runtime/gpu/cl/operators/ClDirectConvolution.cpp)18
-rw-r--r--src/runtime/gpu/cl/operators/ClDirectConv2d.h (renamed from src/runtime/gpu/cl/operators/ClDirectConvolution.h)25
-rw-r--r--src/runtime/gpu/cl/operators/ClPool2d.cpp (renamed from src/runtime/gpu/cl/operators/ClPooling.cpp)14
-rw-r--r--src/runtime/gpu/cl/operators/ClPool2d.h (renamed from src/runtime/gpu/cl/operators/ClPooling.h)19
18 files changed, 525 insertions, 591 deletions
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index 74867ff64f..907e69d8d7 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -29,17 +29,17 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/runtime/gpu/cl/operators/ClActivation.h"
-#include "src/runtime/gpu/cl/operators/ClDirectConvolution.h"
+#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h"
namespace arm_compute
{
struct CLDirectConvolutionLayer::Impl
{
- const ICLTensor *src{ nullptr };
- const ICLTensor *weights{ nullptr };
- const ICLTensor *biases{ nullptr };
- ICLTensor *dst{ nullptr };
- std::unique_ptr<opencl::ClDirectConvolution> op{ nullptr };
+ const ICLTensor *src{ nullptr };
+ const ICLTensor *weights{ nullptr };
+ const ICLTensor *biases{ nullptr };
+ ICLTensor *dst{ nullptr };
+ std::unique_ptr<opencl::ClDirectConv2d> op{ nullptr };
};
CLDirectConvolutionLayer::CLDirectConvolutionLayer()
@@ -65,14 +65,14 @@ void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context
_impl->biases = biases;
_impl->dst = output;
- _impl->op = std::make_unique<opencl::ClDirectConvolution>();
+ _impl->op = std::make_unique<opencl::ClDirectConv2d>();
_impl->op->configure(compile_context, input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, act_info);
}
Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info)
{
- return opencl::ClDirectConvolution::validate(input, weights, biases, output, conv_info, act_info);
+ return opencl::ClDirectConv2d::validate(input, weights, biases, output, conv_info, act_info);
}
void CLDirectConvolutionLayer::run()
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index fbaec1d2d9..7ba911c342 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -26,16 +26,16 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "src/core/CL/ICLKernel.h"
-#include "src/runtime/gpu/cl/operators/ClPooling.h"
+#include "src/runtime/gpu/cl/operators/ClPool2d.h"
namespace arm_compute
{
struct CLPoolingLayer::Impl
{
- const ICLTensor *src{ nullptr };
- ICLTensor *dst{ nullptr };
- ICLTensor *indices{ nullptr };
- std::unique_ptr<opencl::ClPooling> op{ nullptr };
+ const ICLTensor *src{ nullptr };
+ ICLTensor *dst{ nullptr };
+ ICLTensor *indices{ nullptr };
+ std::unique_ptr<opencl::ClPool2d> op{ nullptr };
};
CLPoolingLayer::CLPoolingLayer()
@@ -55,13 +55,13 @@ void CLPoolingLayer::configure(const CLCompileContext &compile_context, ICLTenso
_impl->dst = output;
_impl->indices = indices;
- _impl->op = std::make_unique<opencl::ClPooling>();
+ _impl->op = std::make_unique<opencl::ClPool2d>();
_impl->op->configure(compile_context, input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr);
}
Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- return opencl::ClPooling::validate(input, output, pool_info, indices);
+ return opencl::ClPool2d::validate(input, output, pool_info, indices);
}
void CLPoolingLayer::run()
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index da9610ef42..a561b88058 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -27,7 +27,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConv2d.h"
using namespace arm_compute::misc;
using namespace arm_compute::misc::shape_calculator;
@@ -47,15 +47,15 @@ struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal
const ITensor *biases
{
nullptr
- }; // SRC_2
- Tensor permuted_input{}; // INT_0
- Tensor permuted_weights{}; // INT_1
- Tensor permuted_output{}; // INT_2
- Tensor workspace{}; // INT_3
- Tensor packed_weights{}; // INT_4
- std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr };
- bool is_prepared{ false };
- bool permute{ false };
+ }; // SRC_2
+ Tensor permuted_input{}; // INT_0
+ Tensor permuted_weights{}; // INT_1
+ Tensor permuted_output{}; // INT_2
+ Tensor workspace{}; // INT_3
+ Tensor packed_weights{}; // INT_4
+ std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
+ bool is_prepared{ false };
+ bool permute{ false };
};
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager)
@@ -80,7 +80,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::
_impl->dst = output;
_impl->permute = is_nhwc;
- _impl->op = std::make_unique<cpu::CpuDepthwiseConvolution>();
+ _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
_impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases == nullptr ? nullptr : _impl->biases->info(),
_impl->dst->info(), info);
@@ -97,7 +97,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::
}
info = ConvolutionInfo{ conv_info, depth_multiplier, act_info_to_use, dilation };
- auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConvolutionAssemblyDispatch>();
+ auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>();
if(is_nhwc)
{
@@ -154,7 +154,7 @@ Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal
const Size2D &dilation)
{
ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
- return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info);
+ return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
}
void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run()
@@ -197,17 +197,17 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::
struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl
{
- Tensor permuted_input{};
- Tensor permuted_weights{};
- Tensor permuted_output{};
- bool is_prepared{ false };
- bool is_nchw{ false };
- bool is_activationlayer_enabled{ false };
- const ITensor *weights{ nullptr };
- const ITensor *biases{ nullptr };
- const ITensor *src{ nullptr };
- ITensor *dst{ nullptr };
- std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr };
+ Tensor permuted_input{};
+ Tensor permuted_weights{};
+ Tensor permuted_output{};
+ bool is_prepared{ false };
+ bool is_nchw{ false };
+ bool is_activationlayer_enabled{ false };
+ const ITensor *weights{ nullptr };
+ const ITensor *biases{ nullptr };
+ const ITensor *src{ nullptr };
+ ITensor *dst{ nullptr };
+ std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
};
NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric()
@@ -223,7 +223,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(
output->info(), conv_info, depth_multiplier, act_info, dilation));
const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
- _impl->op = std::make_unique<cpu::CpuDepthwiseConvolution>();
+ _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>();
_impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), info);
_impl->src = input;
@@ -253,7 +253,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure(
output_to_use = &_impl->permuted_output;
}
- auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>();
+ auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info);
if(_impl->is_nchw)
@@ -273,7 +273,7 @@ Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
- return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info);
+ return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
}
void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run()
@@ -298,10 +298,10 @@ NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemory
#ifndef DOXYGEN_SKIP_THIS
struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl
{
- DepthwiseConvolutionFunction depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED };
- NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr };
- NEDepthwiseConvolutionLayerGeneric func_generic{};
- std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr };
+ DepthwiseConvolutionFunction depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED };
+ NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr };
+ NEDepthwiseConvolutionLayerGeneric func_generic{};
+ std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr };
};
#endif // DOXYGEN_SKIP_THIS
@@ -309,7 +309,7 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh
const ActivationLayerInfo &act_info, const Size2D &dilation)
{
const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
- _impl->op = std::make_shared<cpu::CpuDepthwiseConvolution>();
+ _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>();
_impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
info);
switch(_impl->depth_conv_func)
@@ -329,7 +329,7 @@ Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation };
- return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info);
+ return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info);
}
void NEDepthwiseConvolutionLayer::run()
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 73834381c6..58530e4a8f 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -27,17 +27,17 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "src/runtime/cpu/operators/CpuDirectConvolution.h"
+#include "src/runtime/cpu/operators/CpuDirectConv2d.h"
namespace arm_compute
{
struct NEDirectConvolutionLayer::Impl
{
- ITensor *src{ nullptr };
- const ITensor *weights{ nullptr };
- const ITensor *bias{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<cpu::CpuDirectConvolution> op{ nullptr };
+ ITensor *src{ nullptr };
+ const ITensor *weights{ nullptr };
+ const ITensor *bias{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuDirectConv2d> op{ nullptr };
};
NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
@@ -52,14 +52,14 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights,
_impl->weights = weights;
_impl->bias = bias;
_impl->dst = output;
- _impl->op = std::make_unique<cpu::CpuDirectConvolution>(_memory_manager);
+ _impl->op = std::make_unique<cpu::CpuDirectConv2d>(_memory_manager);
_impl->op->configure(input->info(), weights->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), conv_info, act_info);
}
Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info)
{
- return cpu::CpuDirectConvolution::validate(input, weights, bias, output, conv_info, act_info);
+ return cpu::CpuDirectConv2d::validate(input, weights, bias, output, conv_info, act_info);
}
void NEDirectConvolutionLayer::run()
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index 1570cdeedc..bbf3e7cc4e 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -26,17 +26,17 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/Tensor.h"
-#include "src/runtime/cpu/operators/CpuPooling.h"
+#include "src/runtime/cpu/operators/CpuPool2d.h"
namespace arm_compute
{
struct NEPoolingLayer::Impl
{
- ITensor *src{ nullptr };
- ITensor *dst{ nullptr };
- ITensor *indices{ nullptr };
- Tensor workspace{ nullptr };
- std::unique_ptr<cpu::CpuPooling> op{ nullptr };
+ ITensor *src{ nullptr };
+ ITensor *dst{ nullptr };
+ ITensor *indices{ nullptr };
+ Tensor workspace{ nullptr };
+ std::unique_ptr<cpu::CpuPool2d> op{ nullptr };
};
NEPoolingLayer::~NEPoolingLayer() = default;
@@ -51,7 +51,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
_impl->src = input;
_impl->dst = output;
_impl->indices = indices;
- _impl->op = std::make_unique<cpu::CpuPooling>();
+ _impl->op = std::make_unique<cpu::CpuPool2d>();
_impl->op->configure(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr);
// Allocate workspace based on kernel's memory requirements
@@ -66,7 +66,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- return cpu::CpuPooling::validate(input, output, pool_info, indices);
+ return cpu::CpuPool2d::validate(input, output, pool_info, indices);
}
void NEPoolingLayer::run()
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp b/src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp
index 6d097280e0..160a9fd70b 100644
--- a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp
@@ -21,14 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConv2d.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
namespace arm_compute
{
@@ -36,61 +36,61 @@ namespace cpu
{
namespace
{
-Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
+Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
if(!is_data_type_quantized_per_channel(weights->data_type()))
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
}
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1);
- const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > input->dimension(idx_w) + info.pad_stride_info.pad_left() +
+ const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() +
info.pad_stride_info.pad_right());
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > input->dimension(idx_h) + info.pad_stride_info.pad_top() +
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() +
info.pad_stride_info.pad_bottom());
if(biases != nullptr)
{
- const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
+ const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
}
- ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info));
//Validate Activation Layer
if(info.act_info.enabled())
{
- ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
}
return Status{};
}
} // namespace
-CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::CpuDepthwiseConvolutionOptimizedInternal()
+CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::CpuDepthwiseConv2dOptimizedInternal()
: _dwc_optimized_func(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _has_bias(false), _is_quantized(false),
_is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
{
}
-void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configure(ITensorInfo *input,
- const ITensorInfo *weights,
- const ITensorInfo *biases,
- ITensorInfo *output,
- const ConvolutionInfo &info)
+void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ ITensorInfo *dst,
+ const ConvolutionInfo &info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, (biases == nullptr) ? nullptr : biases,
- output, info));
+ ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases,
+ dst, info));
- _is_quantized = is_data_type_quantized_asymmetric(input->data_type());
+ _is_quantized = is_data_type_quantized_asymmetric(src->data_type());
_has_bias = biases != nullptr;
- _is_nchw = input->data_layout() == DataLayout::NCHW;
+ _is_nchw = src->data_layout() == DataLayout::NCHW;
_permute = _is_nchw;
_is_prepared = false;
@@ -105,7 +105,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configur
act_info_to_use = info.act_info;
}
- _dwc_optimized_func = std::make_unique<CpuDepthwiseConvolutionAssemblyDispatch>();
+ _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>();
if(_is_nchw)
{
_permute_input = std::make_unique<cpu::CpuPermute>();
@@ -117,7 +117,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configur
auto output_perm = std::make_unique<TensorInfo>();
// Configure the function to transform the input tensor from NCHW -> NHWC
- _permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U));
+ _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
input_perm->set_data_layout(DataLayout::NHWC);
// Configure the function to transform the weights tensor from IHW -> HWI
@@ -125,38 +125,38 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configur
weights_perm->set_data_layout(DataLayout::NHWC);
output_perm->set_data_layout(DataLayout::NHWC);
- output_perm->set_quantization_info(output->quantization_info());
+ output_perm->set_quantization_info(dst->quantization_info());
// Configure optimized depthwise
_dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
output_perm->set_data_layout(DataLayout::NHWC);
- _permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U));
+ _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
}
else
{
- _dwc_optimized_func->configure(input, weights, biases, output, info);
+ _dwc_optimized_func->configure(src, weights, biases, dst, info);
}
// Configure activation
if(_is_activationlayer_enabled)
{
_activationlayer_function = std::make_unique<cpu::CpuActivation>();
- _activationlayer_function->configure(output, nullptr, info.act_info);
+ _activationlayer_function->configure(dst, nullptr, info.act_info);
}
}
-Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::validate(const ITensorInfo *input,
- const ITensorInfo *weights,
- const ITensorInfo *biases,
- const ITensorInfo *output,
- const ConvolutionInfo &info)
+Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *biases,
+ const ITensorInfo *dst,
+ const ConvolutionInfo &info)
{
- return validate_arguments_optimized(input, weights, biases, output, info);
+ return validate_arguments_optimized(src, weights, biases, dst, info);
}
-void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::run(ITensorPack &tensors)
+void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors)
{
ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
prepare(tensors);
@@ -229,7 +229,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::run(ITen
}
}
-void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare(ITensorPack &tensors)
+void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors)
{
if(!_is_prepared)
{
@@ -272,35 +272,35 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare(
}
}
-CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::CpuDepthwiseConvolutionGeneric()
+CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::CpuDepthwiseConv2dGeneric()
: _depthwise_conv_kernel(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _is_nchw(true), _is_prepared(false),
_is_activationlayer_enabled(false)
{
}
-void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
+void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolution::validate(input, weights, (biases == nullptr) ? nullptr : biases,
- output, info));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases,
+ dst, info));
- _is_nchw = input->data_layout() == DataLayout::NCHW;
+ _is_nchw = src->data_layout() == DataLayout::NCHW;
_is_prepared = !_is_nchw;
- ITensorInfo *input_to_use = input;
+ ITensorInfo *input_to_use = src;
const ITensorInfo *weights_to_use = weights;
- ITensorInfo *output_to_use = output;
+ ITensorInfo *output_to_use = dst;
auto input_perm = std::make_unique<TensorInfo>();
auto weights_perm = std::make_unique<TensorInfo>();
- auto output_perm = std::make_unique<TensorInfo>(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
+ auto output_perm = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
if(_is_nchw)
{
_permute_input = std::make_unique<cpu::CpuPermute>();
_permute_weights = std::make_unique<cpu::CpuPermute>();
- _permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U));
+ _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U));
input_perm->set_data_layout(DataLayout::NHWC);
input_to_use = input_perm.get();
@@ -311,13 +311,13 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorI
output_to_use = output_perm.get();
}
- _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>();
+ _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>();
_depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
if(_is_nchw)
{
_permute_output = std::make_unique<cpu::CpuPermute>();
- _permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U));
+ _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U));
output_perm->set_data_layout(DataLayout::NHWC);
}
@@ -326,48 +326,48 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorI
if(_is_activationlayer_enabled)
{
_activationlayer_function = std::make_unique<cpu::CpuActivation>();
- _activationlayer_function->configure(output, nullptr, info.act_info);
+ _activationlayer_function->configure(dst, nullptr, info.act_info);
}
}
-Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const ConvolutionInfo &info)
+Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const ConvolutionInfo &info)
{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- if(input->data_layout() == DataLayout::NCHW)
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
+ if(src->data_layout() == DataLayout::NCHW)
{
- TensorShape permuted_input_shape = input->tensor_shape();
+ TensorShape permuted_input_shape = src->tensor_shape();
TensorShape permuted_weights_shape = weights->tensor_shape();
- TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
+ TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
- const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
+ const TensorInfo permuted_input = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
- const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
+ const TensorInfo permuted_output = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
- ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U)));
ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
- ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U)));
- ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
+ ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
}
else
{
- ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(input, weights, biases, output, info));
+ ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info));
}
// Validate Activation Layer
if(info.act_info.enabled())
{
- ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info));
}
return Status{};
}
-void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::run(ITensorPack &tensors)
+void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors)
{
auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
@@ -421,7 +421,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::run(ITensorPack &t
}
}
-void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPack &tensors)
+void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors)
{
if(!_is_prepared)
{
@@ -440,47 +440,47 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPac
}
}
-CpuDepthwiseConvolution::CpuDepthwiseConvolution()
+CpuDepthwiseConv2d::CpuDepthwiseConv2d()
: _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(), _func_generic()
{
}
-void CpuDepthwiseConvolution::configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info)
+void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
{
- _depth_conv_func = get_depthwiseconvolution_function(input, weights, (biases != nullptr) ? biases : nullptr, output, info);
+ _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info);
switch(_depth_conv_func)
{
case DepthwiseConvolutionFunction::OPTIMIZED:
- _func_optimized.configure(input, weights, biases, output, info);
+ _func_optimized.configure(src, weights, biases, dst, info);
break;
case DepthwiseConvolutionFunction::GENERIC:
- _func_generic.configure(input, weights, biases, output, info);
+ _func_generic.configure(src, weights, biases, dst, info);
break;
default:
ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
}
}
-Status CpuDepthwiseConvolution::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info)
+Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info)
{
- DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, info);
+ DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info);
switch(depth_conv_func)
{
case DepthwiseConvolutionFunction::OPTIMIZED:
- return CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info);
+ return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info);
break;
case DepthwiseConvolutionFunction::GENERIC:
- return CpuDepthwiseConvolutionGeneric::validate(input, weights, biases, output, info);
+ return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info);
break;
default:
ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
}
}
-DepthwiseConvolutionFunction CpuDepthwiseConvolution::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const ConvolutionInfo &info)
+DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const ConvolutionInfo &info)
{
- if(bool(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info)))
+ if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info)))
{
return DepthwiseConvolutionFunction::OPTIMIZED;
}
@@ -490,7 +490,7 @@ DepthwiseConvolutionFunction CpuDepthwiseConvolution::get_depthwiseconvolution_f
}
}
-void CpuDepthwiseConvolution::run(ITensorPack &tensors)
+void CpuDepthwiseConv2d::run(ITensorPack &tensors)
{
switch(_depth_conv_func)
{
@@ -505,7 +505,7 @@ void CpuDepthwiseConvolution::run(ITensorPack &tensors)
}
}
-void CpuDepthwiseConvolution::prepare(ITensorPack &tensors)
+void CpuDepthwiseConv2d::prepare(ITensorPack &tensors)
{
switch(_depth_conv_func)
{
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConv2d.h b/src/runtime/cpu/operators/CpuDepthwiseConv2d.h
new file mode 100644
index 0000000000..049397fe60
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuDepthwiseConv2d.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2D_H
+#define ARM_COMPUTE_CPU_DEPTHWISECONV2D_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "src/core/cpu/ICpuKernel.h"
+#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h"
+#include "src/runtime/cpu/ICpuOperator.h"
+#include "src/runtime/cpu/operators/CpuActivation.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h"
+#include "src/runtime/cpu/operators/CpuPermute.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Function to execute a depthwise convolution.
+ */
+class CpuDepthwiseConv2d : public ICpuOperator
+{
+public:
+ /** Default constructor */
+ CpuDepthwiseConv2d();
+ /** Initialize the function's source, destination, weights and convolution information.
+ *
+ * @param[in, out] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] dst Destination tensor info. Data type supported: same as @p src.
+ * @param[in] weights Weights tensor info. These are 3D tensor infos with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to CpuDepthwiseConv2d::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info);
+
+ /** Static function to choose the best depthwise convolution function for @ref CpuDepthwiseConv2d
+ *
+ * @param[in] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] dst Destination tensor. Data type supported: same as @p src.
+ * @param[in] info Depthwise convolution meta-data.
+ *
+ * @return a Depthwise Convolution Function
+ */
+ static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const ConvolutionInfo &info);
+
+ // Inherited methods overriden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+
+private:
+ /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
+ *
+ * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
+ *
+ * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
+ * -# @ref CpuDepthwiseConv2d3x3Kernel if 3x3 and no assembly kernel implementation is present
+ * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
+ * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of dst is required
+ * -# @ref NEActivationLayer if fused activation is required
+ *
+ */
+ class CpuDepthwiseConv2dOptimizedInternal : public ICpuOperator
+ {
+ public:
+ /** Default constructor */
+ CpuDepthwiseConv2dOptimizedInternal();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConv2dOptimizedInternal(const CpuDepthwiseConv2dOptimizedInternal &) = delete;
+ /** Default move constructor */
+ CpuDepthwiseConv2dOptimizedInternal(CpuDepthwiseConv2dOptimizedInternal &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConv2dOptimizedInternal &operator=(const CpuDepthwiseConv2dOptimizedInternal &) = delete;
+ /** Default move assignment operator */
+ CpuDepthwiseConv2dOptimizedInternal &operator=(CpuDepthwiseConv2dOptimizedInternal &&) = default;
+ /** Default destructor */
+ ~CpuDepthwiseConv2dOptimizedInternal() = default;
+ /** Initialize the function's source, destination, kernels and border_size.
+ *
+ * @param[in, out] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p src.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] dst Destination tensor info. Data type supported: same as @p src.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to CpuDepthwiseConv2dOptimizedInternal::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info);
+
+ // Inherited methods overriden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+
+ private:
+ std::unique_ptr<CpuDepthwiseConv2dAssemblyDispatch> _dwc_optimized_func{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_input{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_weights{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_output{ nullptr };
+ std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr };
+ bool _has_bias{ false };
+ bool _is_quantized{ false };
+ bool _is_nchw{ true };
+ bool _permute{ false };
+ bool _is_activationlayer_enabled{ false };
+ bool _is_prepared{ false };
+ };
+
+ /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
+ *
+ * -# @ref CpuDepthwiseConv2dNativeKernel
+ *
+ */
+ class CpuDepthwiseConv2dGeneric : public ICpuOperator
+ {
+ public:
+ /** Default constructor */
+ CpuDepthwiseConv2dGeneric();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConv2dGeneric(const CpuDepthwiseConv2dGeneric &) = delete;
+ /** Default move constructor */
+ CpuDepthwiseConv2dGeneric(CpuDepthwiseConv2dGeneric &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CpuDepthwiseConv2dGeneric &operator=(const CpuDepthwiseConv2dGeneric &) = delete;
+ /** Default move assignment operator */
+ CpuDepthwiseConv2dGeneric &operator=(CpuDepthwiseConv2dGeneric &&) = default;
+ /** Default destructor */
+ ~CpuDepthwiseConv2dGeneric() = default;
+ /** Initialize the function's source, destination, weights and convolution information.
+ *
+ * @param[in, out] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
+ * @param[out] dst Destination tensor info. Data type supported: same as @p src.
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+ * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] info Depthwise convolution meta-data.
+ */
+ void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration
+ *
+ * Similar to CpuDepthwiseConv2dGeneric::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+
+ private:
+ std::unique_ptr<kernels::CpuDepthwiseConv2dNativeKernel> _depthwise_conv_kernel{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_input{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_weights{ nullptr };
+ std::unique_ptr<CpuPermute> _permute_output{ nullptr };
+ std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr };
+ bool _is_nchw{ true };
+ bool _is_prepared{ false };
+ bool _is_activationlayer_enabled{ false };
+ };
+
+ DepthwiseConvolutionFunction _depth_conv_func;
+ CpuDepthwiseConv2dOptimizedInternal _func_optimized;
+ CpuDepthwiseConv2dGeneric _func_generic;
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2D_H */
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index 039714abb1..a36ee1d45b 100644
--- a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp
+++ b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -22,7 +22,7 @@
* SOFTWARE.
*/
-#include "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h"
+#include "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Utils.h"
@@ -211,13 +211,13 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp32_convolver(int kernel_
}
}
-std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensorInfo *input,
+std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensorInfo *src,
const ITensorInfo *weights,
ITensorInfo *output,
const ConvolutionInfo &info)
{
- const DataType data_type = input->data_type();
- const TensorShape shape = input->tensor_shape();
+ const DataType data_type = src->data_type();
+ const TensorShape shape = src->tensor_shape();
const int n_batches = shape[3];
const int in_rows = shape.z();
@@ -249,7 +249,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
// Create quantized convolver
if(is_uniform_quantized)
{
- const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform();
+ const UniformQuantizationInfo input_qinfo = src->quantization_info().uniform();
const UniformQuantizationInfo weights_qinfo = weights->quantization_info().uniform();
const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
@@ -273,7 +273,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
}
else if(is_perchannel_quantized)
{
- const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform();
+ const UniformQuantizationInfo input_qinfo = src->quantization_info().uniform();
const QuantizationInfo weights_qinfo = weights->quantization_info();
const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
@@ -327,7 +327,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor
}
} // namespace
-struct CpuDepthwiseConvolutionAssemblyDispatch::LocalImpl
+struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl
{
std::unique_ptr<depthwise::IDepthwiseConvolution> dwc_assembly_kernel{ nullptr };
NEDepthwiseConvolutionAssemblyKernelWrapper dwc_acl_kernel{};
@@ -336,36 +336,36 @@ struct CpuDepthwiseConvolutionAssemblyDispatch::LocalImpl
};
#ifndef DOXYGEN_SKIP_THIS
-CpuDepthwiseConvolutionAssemblyDispatch::CpuDepthwiseConvolutionAssemblyDispatch()
+CpuDepthwiseConv2dAssemblyDispatch::CpuDepthwiseConv2dAssemblyDispatch()
: _pImpl(std::make_unique<LocalImpl>())
{
}
#endif /* DOXYGEN_SKIP_THIS */
-CpuDepthwiseConvolutionAssemblyDispatch::~CpuDepthwiseConvolutionAssemblyDispatch() = default;
+CpuDepthwiseConv2dAssemblyDispatch::~CpuDepthwiseConv2dAssemblyDispatch() = default;
-void CpuDepthwiseConvolutionAssemblyDispatch::configure(const ITensorInfo *input,
- const ITensorInfo *weights,
- const ITensorInfo *bias,
- ITensorInfo *output,
- const ConvolutionInfo &info)
+void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ ITensorInfo *dst,
+ const ConvolutionInfo &info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
ARM_COMPUTE_UNUSED(bias);
- ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionAssemblyDispatch::validate(input,
- weights,
- bias != nullptr ? bias : nullptr,
- output,
- info));
+ ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dAssemblyDispatch::validate(src,
+ weights,
+ bias != nullptr ? bias : nullptr,
+ dst,
+ info));
// Output auto inizialitation if not yet initialized
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
- auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info()));
+ const TensorShape dst_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
+ auto_init_if_empty(*dst, src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(dst_shape).set_quantization_info(dst->quantization_info()));
_pImpl->is_prepared = false;
// Create convolver
- _pImpl->dwc_assembly_kernel = create_convolver(input, weights, output, info);
+ _pImpl->dwc_assembly_kernel = create_convolver(src, weights, dst, info);
ARM_COMPUTE_ERROR_ON(_pImpl->dwc_assembly_kernel == nullptr);
// Create assembly kernel wrapper
@@ -386,27 +386,27 @@ void CpuDepthwiseConvolutionAssemblyDispatch::configure(const ITensorInfo *i
_pImpl->mem_req.push_back({ TensorType::ACL_INT_1, pack_tensor_size, alignment });
}
-experimental::MemoryRequirements CpuDepthwiseConvolutionAssemblyDispatch::workspace() const
+experimental::MemoryRequirements CpuDepthwiseConv2dAssemblyDispatch::workspace() const
{
return _pImpl->mem_req;
}
-Status CpuDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo *input,
- const ITensorInfo *weights,
- const ITensorInfo *bias,
- const ITensorInfo *output,
- const ConvolutionInfo &info)
+Status CpuDepthwiseConv2dAssemblyDispatch::validate(const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ITensorInfo *bias,
+ const ITensorInfo *dst,
+ const ConvolutionInfo &info)
{
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
if(weights->data_type() != DataType::QSYMM8_PER_CHANNEL)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
}
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights);
// Validate convolver
- ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(input, weights, info));
+ ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(src, weights, info));
// Validate activation
const bool is_relu = arm_compute::utils::info_helpers::is_relu(info.act_info);
@@ -416,50 +416,50 @@ Status CpuDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo *
// Check bias
if(bias != nullptr)
{
- unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
+ unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights->dimension(channel_idx));
}
// Check output
- if(output->total_size() != 0)
+ if(dst->total_size() != 0)
{
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ const TensorShape dst_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), dst_shape);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
}
// The uniform quantization case will only have 1 scale value in the weights quantization info
- const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform();
+ const UniformQuantizationInfo src_qinfo = src->quantization_info().uniform();
const QuantizationInfo weights_qinfo = weights->quantization_info();
- const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform();
+ const UniformQuantizationInfo dst_qinfo = dst->quantization_info().uniform();
for(auto const s : weights_qinfo.scale())
{
- const float fmultipler = input_qinfo.scale * s / output_qinfo.scale;
+ const float fmultipler = src_qinfo.scale * s / dst_qinfo.scale;
ARM_COMPUTE_RETURN_ERROR_ON(fmultipler > 1.f);
}
return Status{};
}
-bool CpuDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITensorInfo *input,
- const ITensorInfo *weights,
- const ConvolutionInfo &info)
+bool CpuDepthwiseConv2dAssemblyDispatch::is_optimized_supported(const ITensorInfo *src,
+ const ITensorInfo *weights,
+ const ConvolutionInfo &info)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights);
// Reshape input shape if in NHWC format
- const DataLayout data_layout = input->data_layout();
- TensorShape in_shape{ input->tensor_shape() };
+ const DataLayout data_layout = src->data_layout();
+ TensorShape in_shape{ src->tensor_shape() };
if(data_layout == DataLayout::NHWC)
{
- in_shape.set(Window::DimX, input->tensor_shape().y());
- in_shape.set(Window::DimY, input->tensor_shape().z());
- in_shape.set(Window::DimZ, input->tensor_shape().x());
+ in_shape.set(Window::DimX, src->tensor_shape().y());
+ in_shape.set(Window::DimY, src->tensor_shape().z());
+ in_shape.set(Window::DimZ, src->tensor_shape().x());
}
// Check data type
- const DataType input_type = input->data_type();
+ const DataType input_type = src->data_type();
const bool is_input_type_valid = is_data_type_float(input_type) || input_type == DataType::QASYMM8;
const DataType weights_type = weights->data_type();
const bool is_weights_type_valid = is_data_type_float(weights_type) || weights_type == DataType::QASYMM8 || weights_type == DataType::QASYMM8_SIGNED
@@ -497,7 +497,7 @@ bool CpuDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITens
return is_input_type_valid && is_weights_type_valid && weights_supported && supported_strides && supported_padding && (info.depth_multiplier == 1) && is_dilation_supported;
}
-void CpuDepthwiseConvolutionAssemblyDispatch::run(ITensorPack &tensors)
+void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
{
// Prepare assembly kernel
prepare(tensors);
@@ -530,7 +530,7 @@ void CpuDepthwiseConvolutionAssemblyDispatch::run(ITensorPack &tensors)
NEScheduler::get().schedule(&_pImpl->dwc_acl_kernel, Window::DimX);
}
-void CpuDepthwiseConvolutionAssemblyDispatch::prepare(ITensorPack &tensors)
+void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors)
{
if(!_pImpl->is_prepared)
{
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h
index 6aac74c3ef..195942b7fd 100644
--- a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h
+++ b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h
@@ -21,9 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
-#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H
+#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2DASSEMBLYDISPATCH_H
+#define ARM_COMPUTE_CPU_DEPTHWISECONV2DASSEMBLYDISPATCH_H
+#include "src/core/common/Macros.h"
#include "src/runtime/cpu/ICpuOperator.h"
namespace arm_compute
@@ -31,57 +32,45 @@ namespace arm_compute
namespace cpu
{
/** Depthwise convolution assembly kernel glue */
-class CpuDepthwiseConvolutionAssemblyDispatch : public ICpuOperator
+class CpuDepthwiseConv2dAssemblyDispatch : public ICpuOperator
{
public:
- CpuDepthwiseConvolutionAssemblyDispatch();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuDepthwiseConvolutionAssemblyDispatch(const CpuDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move constructor */
- CpuDepthwiseConvolutionAssemblyDispatch(CpuDepthwiseConvolutionAssemblyDispatch &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuDepthwiseConvolutionAssemblyDispatch &operator=(const CpuDepthwiseConvolutionAssemblyDispatch &) = delete;
- /** Default move assignment operator */
- CpuDepthwiseConvolutionAssemblyDispatch &operator=(CpuDepthwiseConvolutionAssemblyDispatch &&) = default;
+ /** Default constructor */
+ CpuDepthwiseConv2dAssemblyDispatch();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConv2dAssemblyDispatch);
/** Default destructor */
- ~CpuDepthwiseConvolutionAssemblyDispatch();
+ ~CpuDepthwiseConv2dAssemblyDispatch();
+
/** Initialize the function's source, destination, kernels and border_size.
*
* @note Supports only NHWC format
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
+ * @param[in] src Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+ * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p src.
* @param[in] bias (Optional) Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor info. Data type supported: same as @p input.
+ * Data type supported: Same as @p src.
+ * @param[out] dst Destination tensor info. Data type supported: same as @p src.
* @param[in] info Depthwise convolution meta-data.
*/
- void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const ConvolutionInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionAssemblyDispatch
- *
- * @note Supports only NHWC format
+ void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const ConvolutionInfo &info);
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input.
- * @param[out] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] info Depthwise convolution meta-data.
+ * Similar to CpuDepthwiseConv2dAssemblyDispatch::configure()
*
- * @return An error status
+ * @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const ConvolutionInfo &info);
+ static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const ConvolutionInfo &info);
/** Check if the optimized kernel can be used for the given kernel sizes and strides
*
* @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC
*
- * @param[in] input Input tensor info.
+ * @param[in] src Input tensor info.
* @param[in] weights Weights tensor info.
* @param[in] info Depthwise convolution meta-data.
*
* @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed.
*/
- static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, const ConvolutionInfo &info);
+ static bool is_optimized_supported(const ITensorInfo *src, const ITensorInfo *weights, const ConvolutionInfo &info);
// Inherited methods overridden:
void run(ITensorPack &tensors) override;
@@ -94,4 +83,4 @@ private:
};
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */
+#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2DASSEMBLYDISPATCH_H */
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.h b/src/runtime/cpu/operators/CpuDepthwiseConvolution.h
deleted file mode 100644
index e39cb7db4d..0000000000
--- a/src/runtime/cpu/operators/CpuDepthwiseConvolution.h
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPU_DEQUANTIZATION_H
-#define ARM_COMPUTE_CPU_DEQUANTIZATION_H
-
-#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/core/experimental/Types.h"
-#include "src/core/cpu/ICpuKernel.h"
-#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
-#include "src/runtime/cpu/ICpuOperator.h"
-#include "src/runtime/cpu/operators/CpuActivation.h"
-#include "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h"
-#include "src/runtime/cpu/operators/CpuPermute.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cpu
-{
-/** Function to execute a depthwise convolution.
- */
-class CpuDepthwiseConvolution : public ICpuOperator
-{
-public:
- /** Default constructor */
- CpuDepthwiseConvolution();
- /** Initialize the function's source, destination, weights and convolution information.
- *
- * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] weights Weights tensor info. These are 3D tensor infos with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] info Depthwise convolution meta-data.
- */
- void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolution
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] weights Weights tensor info. These are 3D tensors info with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] info Depthwise convolution meta-data.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
-
- /** Static function to choose the best depthwise convolution function for @ref CpuDepthwiseConvolution
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: same as @p input.
- * @param[in] info Depthwise convolution meta-data.
- *
- * @return a Depthwise Convolution Function
- */
- static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
- const ConvolutionInfo &info);
-
- // Inherited methods overriden:
- void run(ITensorPack &tensors) override;
- void prepare(ITensorPack &tensors) override;
-
-private:
- /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
- *
- * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
- *
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present
- * -# @ref CpuDepthwiseConvolution3x3Kernel if 3x3 and no assembly kernel implementation is present
- * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present
- * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required
- * -# @ref NEActivationLayer if fused activation is required
- *
- */
- class CpuDepthwiseConvolutionOptimizedInternal : public ICpuOperator
- {
- public:
- /** Default constructor */
- CpuDepthwiseConvolutionOptimizedInternal();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuDepthwiseConvolutionOptimizedInternal(const CpuDepthwiseConvolutionOptimizedInternal &) = delete;
- /** Default move constructor */
- CpuDepthwiseConvolutionOptimizedInternal(CpuDepthwiseConvolutionOptimizedInternal &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuDepthwiseConvolutionOptimizedInternal &operator=(const CpuDepthwiseConvolutionOptimizedInternal &) = delete;
- /** Default move assignment operator */
- CpuDepthwiseConvolutionOptimizedInternal &operator=(CpuDepthwiseConvolutionOptimizedInternal &&) = default;
- /** Default destructor */
- ~CpuDepthwiseConvolutionOptimizedInternal() = default;
- /** Initialize the function's source, destination, kernels and border_size.
- *
- * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] info Depthwise convolution meta-data.
- */
- void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolution3x3
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] info Depthwise convolution meta-data.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
-
- // Inherited methods overriden:
- void run(ITensorPack &tensors) override;
- void prepare(ITensorPack &tensors) override;
-
- private:
- std::unique_ptr<CpuDepthwiseConvolutionAssemblyDispatch> _dwc_optimized_func{ nullptr };
- std::unique_ptr<CpuPermute> _permute_input{ nullptr };
- std::unique_ptr<CpuPermute> _permute_weights{ nullptr };
- std::unique_ptr<CpuPermute> _permute_output{ nullptr };
- std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr };
- bool _has_bias{ false };
- bool _is_quantized{ false };
- bool _is_nchw{ true };
- bool _permute{ false };
- bool _is_activationlayer_enabled{ false };
- bool _is_prepared{ false };
- };
-
- /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
- *
- * -# @ref CpuDepthwiseConvolutionNativeKernel
- *
- */
- class CpuDepthwiseConvolutionGeneric : public ICpuOperator
- {
- public:
- /** Default constructor */
- CpuDepthwiseConvolutionGeneric();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuDepthwiseConvolutionGeneric(const CpuDepthwiseConvolutionGeneric &) = delete;
- /** Default move constructor */
- CpuDepthwiseConvolutionGeneric(CpuDepthwiseConvolutionGeneric &&) = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuDepthwiseConvolutionGeneric &operator=(const CpuDepthwiseConvolutionGeneric &) = delete;
- /** Default move assignment operator */
- CpuDepthwiseConvolutionGeneric &operator=(CpuDepthwiseConvolutionGeneric &&) = default;
- /** Default destructor */
- ~CpuDepthwiseConvolutionGeneric() = default;
- /** Initialize the function's source, destination, weights and convolution information.
- *
- * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[out] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] info Depthwise convolution meta-data.
- */
- void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionGeneric
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
- * @param[in] output Destination tensor info. Data type supported: same as @p input.
- * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] info Depthwise convolution meta-data.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
- void prepare(ITensorPack &tensors) override;
-
- private:
- std::unique_ptr<kernels::CpuDepthwiseConvolutionNativeKernel> _depthwise_conv_kernel{ nullptr };
- std::unique_ptr<CpuPermute> _permute_input{ nullptr };
- std::unique_ptr<CpuPermute> _permute_weights{ nullptr };
- std::unique_ptr<CpuPermute> _permute_output{ nullptr };
- std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr };
- bool _is_nchw{ true };
- bool _is_prepared{ false };
- bool _is_activationlayer_enabled{ false };
- };
-
- DepthwiseConvolutionFunction _depth_conv_func;
- CpuDepthwiseConvolutionOptimizedInternal _func_optimized;
- CpuDepthwiseConvolutionGeneric _func_generic;
-};
-} // namespace cpu
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_DEQUANTIZATION_H */
diff --git a/src/runtime/cpu/operators/CpuDirectConvolution.cpp b/src/runtime/cpu/operators/CpuDirectConv2d.cpp
index 33f79603e8..8812b777a3 100644
--- a/src/runtime/cpu/operators/CpuDirectConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuDirectConv2d.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/runtime/cpu/operators/CpuDirectConvolution.h"
+#include "src/runtime/cpu/operators/CpuDirectConv2d.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Utils.h"
@@ -32,19 +32,19 @@ namespace arm_compute
{
namespace cpu
{
-CpuDirectConvolution::~CpuDirectConvolution() = default;
+CpuDirectConv2d::~CpuDirectConv2d() = default;
-CpuDirectConvolution::CpuDirectConvolution(std::shared_ptr<IMemoryManager> memory_manager)
+CpuDirectConv2d::CpuDirectConv2d(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false),
_is_activationlayer_enabled(false), _dim_split(Window::DimZ), _is_padding_required()
{
}
-void CpuDirectConvolution::configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+void CpuDirectConv2d::configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
- _output_stage_kernel = std::make_unique<kernels::CpuDirectConvolutionOutputStageKernel>();
- _conv_kernel = std::make_unique<kernels::CpuDirectConvolutionKernel>();
+ _output_stage_kernel = std::make_unique<kernels::CpuDirectConv2dOutputStageKernel>();
+ _conv_kernel = std::make_unique<kernels::CpuDirectConv2dKernel>();
_input_border_handler = std::make_unique<NEFillBorderKernel>();
// Free accumulator
@@ -80,8 +80,8 @@ void CpuDirectConvolution::configure(ITensorInfo *src, ITensorInfo *weights, con
}
}
-Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const ActivationLayerInfo &act_info)
+Status CpuDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const PadStrideInfo &conv_info,
+ const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
@@ -90,7 +90,7 @@ Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo
TensorInfo accumulator(dst->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type));
// Validate Convolution kernel
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConvolutionKernel::validate(src, weights, &accumulator, conv_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConv2dKernel::validate(src, weights, &accumulator, conv_info));
if(bias != nullptr)
{
@@ -101,7 +101,7 @@ Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo
}
// Validate bias kernel
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConvolutionOutputStageKernel::validate(&accumulator, bias, dst));
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConv2dOutputStageKernel::validate(&accumulator, bias, dst));
if(act_info.enabled())
{
@@ -111,7 +111,7 @@ Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo
return Status{};
}
-void CpuDirectConvolution::run(ITensorPack &tensors)
+void CpuDirectConv2d::run(ITensorPack &tensors)
{
MemoryGroupResourceScope scope_mg(_memory_group);
diff --git a/src/runtime/cpu/operators/CpuDirectConvolution.h b/src/runtime/cpu/operators/CpuDirectConv2d.h
index 0635e087fd..9e584b9c49 100644
--- a/src/runtime/cpu/operators/CpuDirectConvolution.h
+++ b/src/runtime/cpu/operators/CpuDirectConv2d.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_DIRECTCONVOLUTION_H
-#define ARM_COMPUTE_CPU_DIRECTCONVOLUTION_H
+#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_H
+#define ARM_COMPUTE_CPU_DIRECTCONV2D_H
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Types.h"
@@ -33,8 +33,8 @@
#include "arm_compute/runtime/Tensor.h"
#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "src/core/cpu/ICpuKernel.h"
-#include "src/core/cpu/kernels/CpuDirectConvolutionKernel.h"
-#include "src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h"
+#include "src/core/cpu/kernels/CpuDirectConv2dKernel.h"
+#include "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h"
#include "src/runtime/cpu/ICpuOperator.h"
#include "src/runtime/cpu/operators/CpuActivation.h"
@@ -49,16 +49,16 @@ namespace cpu
* This function calls the following kernels:
*
* -# @ref NEFillBorderKernel for the input
- * -# @ref kernels::CpuDirectConvolutionOutputStageKernel
- * -# @ref kernels::CpuDirectConvolutionKernel
+ * -# @ref kernels::CpuDirectConv2dOutputStageKernel
+ * -# @ref kernels::CpuDirectConv2dKernel
*/
-class CpuDirectConvolution : public ICpuOperator
+class CpuDirectConv2d : public ICpuOperator
{
public:
/** Constructor */
- CpuDirectConvolution(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ CpuDirectConv2d(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Destructor */
- ~CpuDirectConvolution();
+ ~CpuDirectConv2d();
/** Set the input, weights, biases and output tensors.
*
* @note: DirectConvolution only works in the following configurations:
@@ -78,23 +78,9 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer
+ /** Static function to check if given info will lead to a valid configuration
*
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
- * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32
- * 5x5 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F32
- *
- * @param[in] src Input tensor info. Data types supported: F16/F32.
- * @param[in] weights Set of kernels to convolve the input volume.
- * Supported sizes: 1x1, 3x3 and 5x5.
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported: Same as @p src.
- * @param[in] bias Set of biases. Can be nullptr. Data type supported: Same as @p src.
- * @param[in] dst Output tensor info.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * Similar to CpuDirectConv2d::configure()
*
* @return a status
*/
@@ -105,17 +91,17 @@ public:
void run(ITensorPack &tensors) override;
private:
- MemoryGroup _memory_group;
- std::unique_ptr<kernels::CpuDirectConvolutionOutputStageKernel> _output_stage_kernel;
- std::unique_ptr<kernels::CpuDirectConvolutionKernel> _conv_kernel;
- std::unique_ptr<NEFillBorderKernel> _input_border_handler;
- std::unique_ptr<CpuActivation> _activationlayer_function;
- Tensor _accumulator;
- bool _has_bias{ false };
- bool _is_activationlayer_enabled{ false };
- unsigned int _dim_split{ 0 };
- bool _is_padding_required{ false };
+ MemoryGroup _memory_group;
+ std::unique_ptr<kernels::CpuDirectConv2dOutputStageKernel> _output_stage_kernel;
+ std::unique_ptr<kernels::CpuDirectConv2dKernel> _conv_kernel;
+ std::unique_ptr<NEFillBorderKernel> _input_border_handler;
+ std::unique_ptr<CpuActivation> _activationlayer_function;
+ Tensor _accumulator;
+ bool _has_bias{ false };
+ bool _is_activationlayer_enabled{ false };
+ unsigned int _dim_split{ 0 };
+ bool _is_padding_required{ false };
};
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_DIRECTCONVOLUTION_H */
+#endif /* ARM_COMPUTE_CPU_DIRECTCONV2D_H */
diff --git a/src/runtime/cpu/operators/CpuPooling.cpp b/src/runtime/cpu/operators/CpuPool2d.cpp
index 3a6ac24a74..b225199c40 100644
--- a/src/runtime/cpu/operators/CpuPooling.cpp
+++ b/src/runtime/cpu/operators/CpuPool2d.cpp
@@ -21,20 +21,20 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/runtime/cpu/operators/CpuPooling.h"
+#include "src/runtime/cpu/operators/CpuPool2d.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/core/NEON/kernels/NEFillBorderKernel.h"
-#include "src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h"
-#include "src/core/cpu/kernels/CpuPoolingKernel.h"
+#include "src/core/cpu/kernels/CpuPool2dKernel.h"
+#include "src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h"
namespace arm_compute
{
namespace cpu
{
-CpuPooling::CpuPooling()
+CpuPool2d::CpuPool2d()
: _pooling_layer_kernel(),
_border_handler(),
_asm_glue(),
@@ -44,12 +44,12 @@ CpuPooling::CpuPooling()
{
}
-CpuPooling::~CpuPooling() = default;
+CpuPool2d::~CpuPool2d() = default;
-void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices)
+void CpuPool2d::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices)
{
// Check if we can run assembly kernels. Currently, indices are not supported by those kernels
- const bool run_optimised = bool(kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr);
+ const bool run_optimised = bool(kernels::CpuPool2dAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr);
// Get data layout
_data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout;
@@ -64,7 +64,7 @@ void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLaye
const CPUInfo &ci = NEScheduler::get().cpu_info();
const unsigned int num_threads = NEScheduler::get().num_threads();
- auto pooling_wrapper = std::make_unique<kernels::CpuPoolingAssemblyWrapperKernel>();
+ auto pooling_wrapper = std::make_unique<kernels::CpuPool2dAssemblyWrapperKernel>();
ARM_COMPUTE_ERROR_ON(pooling_wrapper == nullptr);
pooling_wrapper->configure(src, dst, pool_info, ci);
@@ -78,7 +78,7 @@ void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLaye
else
{
// Configure pooling kernel
- auto k = std::make_unique<kernels::CpuPoolingKernel>();
+ auto k = std::make_unique<kernels::CpuPool2dKernel>();
k->configure(src, dst, pool_info, indices);
_pooling_layer_kernel = std::move(k);
@@ -106,19 +106,19 @@ void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLaye
}
}
-Status CpuPooling::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
+Status CpuPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- const bool run_optimised = bool(kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr);
+ const bool run_optimised = bool(kernels::CpuPool2dAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr);
if(run_optimised)
{
return Status{};
}
- return kernels::CpuPoolingKernel::validate(src, dst, pool_info, indices);
+ return kernels::CpuPool2dKernel::validate(src, dst, pool_info, indices);
}
-void CpuPooling::run(ITensorPack &tensors)
+void CpuPool2d::run(ITensorPack &tensors)
{
ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No tensors provided");
@@ -148,7 +148,7 @@ void CpuPooling::run(ITensorPack &tensors)
}
}
-experimental::MemoryRequirements CpuPooling::workspace() const
+experimental::MemoryRequirements CpuPool2d::workspace() const
{
return _mem_req;
}
diff --git a/src/runtime/cpu/operators/CpuPooling.h b/src/runtime/cpu/operators/CpuPool2d.h
index bc30adf762..ae3d115dfc 100644
--- a/src/runtime/cpu/operators/CpuPooling.h
+++ b/src/runtime/cpu/operators/CpuPool2d.h
@@ -21,12 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_POOLING_H
-#define ARM_COMPUTE_CPU_POOLING_H
-
-#include "src/runtime/cpu/ICpuOperator.h"
+#ifndef ARM_COMPUTE_CPU_POOL2D_H
+#define ARM_COMPUTE_CPU_POOL2D_H
#include "arm_compute/core/experimental/Types.h"
+#include "src/core/common/Macros.h"
+#include "src/runtime/cpu/ICpuOperator.h"
#include <memory>
@@ -40,24 +40,17 @@ namespace cpu
/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels:
*
* -# @ref NEFillBorderKernel (executed if padding size is different from zero)
- * -# @ref kernels::CpuPoolingKernel
- * -# @ref kernels::CpuPoolingAssemblyWrapperKernel
+ * -# @ref kernels::CpuPool2dKernel
+ * -# @ref kernels::CpuPool2dAssemblyWrapperKernel
*/
-class CpuPooling : public ICpuOperator
+class CpuPool2d : public ICpuOperator
{
public:
/** Constructor */
- CpuPooling();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuPooling(const CpuPooling &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CpuPooling &operator=(const CpuPooling &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- CpuPooling(CpuPooling &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- CpuPooling &operator=(CpuPooling &&) = delete;
+ CpuPool2d();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2d);
/** Default destructor */
- ~CpuPooling();
+ ~CpuPool2d();
/** Set the src and dst tensors.
*
* @note F16 is supported for pool sizes 2 and 3 only
@@ -68,14 +61,9 @@ public:
* @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CpuPooling
- *
- * @note F16 is supported for pool sizes 2 and 3 only
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] src Source tensor info. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] dst Destination tensor info. Data types supported: same as @p src.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) Tensor info of the indices of the maximal values. Data type supported: U32.
+ * Similar to CpuPool2d::configure()
*
* @return a status
*/
@@ -96,4 +84,4 @@ private:
};
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_POOLING_H */
+#endif /* ARM_COMPUTE_CPU_POOL2D_H */
diff --git a/src/runtime/gpu/cl/operators/ClDirectConvolution.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp
index 3382a6c3c5..527b3a65f9 100644
--- a/src/runtime/gpu/cl/operators/ClDirectConvolution.cpp
+++ b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp
@@ -21,13 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/runtime/gpu/cl/operators/ClDirectConvolution.h"
+#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/gpu/cl/ClCompileContext.h"
#include "src/core/gpu/cl/kernels/ClActivationKernel.h"
-#include "src/core/gpu/cl/kernels/ClDirectConvolutionKernel.h"
+#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h"
namespace arm_compute
{
@@ -44,11 +44,11 @@ ITensorPack select_activation_src_dst(ITensorPack &tensors)
}
} // namespace
-void ClDirectConvolution::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
// Configure direct convolution kernel
- auto k = std::make_unique<kernels::ClDirectConvolutionKernel>();
+ auto k = std::make_unique<kernels::ClDirectConv2dKernel>();
k->set_target(CLScheduler::get().target());
k->configure(compile_context, src, weights, biases, dst, conv_info);
_direct_conv_kernel = std::move(k);
@@ -74,10 +74,10 @@ void ClDirectConvolution::configure(const CLCompileContext &compile_context, ITe
CLScheduler::get().tune_kernel_static(*_direct_conv_kernel);
}
-Status ClDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
- const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
+Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
- ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConvolutionKernel::validate(src, weights, biases, dst, conv_info, CLScheduler::get().target()));
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, CLScheduler::get().target()));
if(act_info.enabled())
{
ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info));
@@ -85,7 +85,7 @@ Status ClDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo *
return Status{};
}
-void ClDirectConvolution::run(ITensorPack &tensors)
+void ClDirectConv2d::run(ITensorPack &tensors)
{
// Run border handler
CLScheduler::get().enqueue_op(*_src_border_handler.get(), tensors, false);
diff --git a/src/runtime/gpu/cl/operators/ClDirectConvolution.h b/src/runtime/gpu/cl/operators/ClDirectConv2d.h
index e7ad927b0b..e069733fab 100644
--- a/src/runtime/gpu/cl/operators/ClDirectConvolution.h
+++ b/src/runtime/gpu/cl/operators/ClDirectConv2d.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CL_DIRECT_CONVOLUTION_H
-#define ARM_COMPUTE_CL_DIRECT_CONVOLUTION_H
+#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_H
+#define ARM_COMPUTE_CL_DIRECT_CONV2D_H
#include "src/core/gpu/cl/ClCompileContext.h"
#include "src/core/gpu/cl/IClKernel.h"
@@ -37,13 +37,13 @@ namespace opencl
/** Basic function to simulate a directly convolution layer. This function calls the following OpenCL kernels:
*
* -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref opencl::ClDirectConvolution
+ * -# @ref opencl::ClDirectConv2d
*/
-class ClDirectConvolution : public IClOperator
+class ClDirectConv2d : public IClOperator
{
public:
/** Constructor */
- ClDirectConvolution() = default;
+ ClDirectConv2d() = default;
/** Set the src and dst tensors.
*
* @param[in] compile_context The compile context to be used.
@@ -61,18 +61,9 @@ public:
*/
void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info,
const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref ClDirectConvolution
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of srcs.
- * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
- * Data types supported: Same as @p src.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * Similar to ClDirectConv2d::configure()
*
* @return a status
*/
@@ -89,4 +80,4 @@ private:
};
} // namespace opencl
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_DIRECT_CONVOLUTION_H */ \ No newline at end of file
+#endif /* ARM_COMPUTE_CL_DIRECT_CONV2D_H */ \ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPooling.cpp b/src/runtime/gpu/cl/operators/ClPool2d.cpp
index 8610eb9842..40c2b0a8ba 100644
--- a/src/runtime/gpu/cl/operators/ClPooling.cpp
+++ b/src/runtime/gpu/cl/operators/ClPool2d.cpp
@@ -21,23 +21,23 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/runtime/gpu/cl/operators/ClPooling.h"
+#include "src/runtime/gpu/cl/operators/ClPool2d.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "src/core/gpu/cl/ClCompileContext.h"
-#include "src/core/gpu/cl/kernels/ClPoolingKernel.h"
+#include "src/core/gpu/cl/kernels/ClPool2dKernel.h"
namespace arm_compute
{
namespace opencl
{
-void ClPooling::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices)
+void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
// Configure pooling kernel
- auto k = std::make_unique<kernels::ClPoolingKernel>();
+ auto k = std::make_unique<kernels::ClPool2dKernel>();
k->set_target(CLScheduler::get().target());
k->configure(compile_context, src, dst, info, indices);
_pooling = std::move(k);
@@ -85,12 +85,12 @@ void ClPooling::configure(const ClCompileContext &compile_context, ITensorInfo *
CLScheduler::get().tune_kernel_static(*_pooling);
}
-Status ClPooling::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices)
+Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices)
{
- return kernels::ClPoolingKernel::validate(src, dst, info, indices);
+ return kernels::ClPool2dKernel::validate(src, dst, info, indices);
}
-void ClPooling::run(ITensorPack &tensors)
+void ClPool2d::run(ITensorPack &tensors)
{
ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
diff --git a/src/runtime/gpu/cl/operators/ClPooling.h b/src/runtime/gpu/cl/operators/ClPool2d.h
index 99de6d0dcf..8ac386a64b 100644
--- a/src/runtime/gpu/cl/operators/ClPooling.h
+++ b/src/runtime/gpu/cl/operators/ClPool2d.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CL_POOLING_H
-#define ARM_COMPUTE_CL_POOLING_H
+#ifndef ARM_COMPUTE_CL_POOL2D_H
+#define ARM_COMPUTE_CL_POOL2D_H
#include "src/core/gpu/cl/ClCompileContext.h"
#include "src/runtime/gpu/cl/IClOperator.h"
@@ -36,13 +36,13 @@ namespace opencl
/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
*
* -# @ref CLFillBorderKernel (executed if padding size is different from zero)
- * -# @ref opencl::ClPooling
+ * -# @ref opencl::ClPool2d
*/
-class ClPooling : public IClOperator
+class ClPool2d : public IClOperator
{
public:
/** Constructor */
- ClPooling() = default;
+ ClPool2d() = default;
/** Configure operator for a given list of arguments
*
* @param[in] compile_context The compile context to be used.
@@ -52,12 +52,9 @@ public:
* @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32.
*/
void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref ClPooling
+ /** Static function to check if given info will lead to a valid configuration
*
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] dst Destination tensor info. Data type supported: same as @p src
- * @param[in] info Pooling layer parameters.
- * @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32.
+ * Similar to ClPool2d::configure()
*
* @return a status
*/
@@ -72,4 +69,4 @@ private:
};
} // namespace opencl
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_POOLING_H */
+#endif /* ARM_COMPUTE_CL_POOL2D_H */