diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h | 4 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h | 2 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp (renamed from src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp) | 147 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h (renamed from src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h) | 40 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dKernel.cpp (renamed from src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp) | 16 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dKernel.h (renamed from src/core/cpu/kernels/CpuDirectConvolutionKernel.h) | 25 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp (renamed from src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp) | 16 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h (renamed from src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h) | 26 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuPool2dKernel.cpp (renamed from src/core/cpu/kernels/CpuPoolingKernel.cpp) | 14 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuPool2dKernel.h (renamed from src/core/cpu/kernels/CpuPoolingKernel.h) | 21 | ||||
-rw-r--r-- | src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp (renamed from src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp) | 16 | ||||
-rw-r--r-- | src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h (renamed from src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h) | 26 | ||||
-rw-r--r-- | src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp (renamed from src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp) | 14 | ||||
-rw-r--r-- | src/core/gpu/cl/kernels/ClDirectConv2dKernel.h (renamed from src/core/gpu/cl/kernels/ClDirectConvolutionKernel.h) | 26 | ||||
-rw-r--r-- | src/core/gpu/cl/kernels/ClPool2dKernel.cpp (renamed from src/core/gpu/cl/kernels/ClPoolingKernel.cpp) | 12 | ||||
-rw-r--r-- | src/core/gpu/cl/kernels/ClPool2dKernel.h (renamed from src/core/gpu/cl/kernels/ClPoolingKernel.h) | 19 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLDirectConvolutionLayer.cpp | 16 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLPoolingLayer.cpp | 14 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | 66 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp | 16 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEPoolingLayer.cpp | 16 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp (renamed from src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp) | 162 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDepthwiseConv2d.h | 213 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp (renamed from src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp) | 108 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h (renamed from src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h) | 53 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDepthwiseConvolution.h | 230 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDirectConv2d.cpp (renamed from src/runtime/cpu/operators/CpuDirectConvolution.cpp) | 22 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuDirectConv2d.h (renamed from src/runtime/cpu/operators/CpuDirectConvolution.h) | 58 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuPool2d.cpp (renamed from src/runtime/cpu/operators/CpuPooling.cpp) | 28 | ||||
-rw-r--r-- | src/runtime/cpu/operators/CpuPool2d.h (renamed from src/runtime/cpu/operators/CpuPooling.h) | 38 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClDirectConv2d.cpp (renamed from src/runtime/gpu/cl/operators/ClDirectConvolution.cpp) | 18 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClDirectConv2d.h (renamed from src/runtime/gpu/cl/operators/ClDirectConvolution.h) | 25 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClPool2d.cpp (renamed from src/runtime/gpu/cl/operators/ClPooling.cpp) | 14 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClPool2d.h (renamed from src/runtime/gpu/cl/operators/ClPooling.h) | 19 |
34 files changed, 716 insertions, 824 deletions
diff --git a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h index cc96cf1a1f..45481d0507 100644 --- a/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h +++ b/src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h @@ -53,7 +53,7 @@ public: * @param[in] compile_context The compile context to be used. * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. - * @ref opencl::ClPooling with indices should precede this function in order to + * @ref CLPoolingLayer with indices should precede this function in order to * properly reconstruct the output tensor. * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. * @param[out] output Destination tensor. Data types supported: Same as @p input. @@ -65,7 +65,7 @@ public: * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] indices TensorInfo associated to the tensor containing the offset to store the input elements in the output tensor. - * @ref opencl::ClPooling with indices should precede this function in order to + * @ref CLPoolingLayer with indices should precede this function in order to * properly reconstruct the output tensor. * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. diff --git a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h index f42272826c..ecc116e585 100644 --- a/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h +++ b/src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h @@ -56,7 +56,7 @@ public: * * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] indices Tensor containing the offset to store the input elements in the output tensor. - * @ref cpu::kernels::CpuPoolingKernel with indices should precede this function in order to + * @ref NEPoolingLayer with indices should precede this function in order to * properly reconstruct the output tensor. * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32. * @param[out] output Destination tensor. Data types supported: Same as @p input. diff --git a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp index a5d1b61c08..4ddb35f2d5 100644 --- a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp +++ b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h" +#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/ITensorInfo.h" @@ -74,7 +74,7 @@ struct DepthwiseConvolutionRunInfo const size_t input_width; const size_t input_depth; - DepthwiseConvolutionRunInfo(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info, const Window &w, uint32_t depth_multiplier = 1) + DepthwiseConvolutionRunInfo(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info, const Window &w, uint32_t depth_multiplier = 1) // NOLINT : num_read_elements_per_iteration((depth_multiplier == 1 ? (vector_size / element_size_from_data_type(input.data_type())) : 1)), x_start(w.x().start()), x_end(w.x().end()), @@ -110,14 +110,14 @@ inline bool is_valid_input_region(int32_t base_w, uint32_t base_h, uint32_t w, u } template <typename T> -void depthwise_loop_multiplier1_fp(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, +void depthwise_loop_multiplier1_fp(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, const Size2D &dilation, const Window &window, bool has_biases) { constexpr auto element_per_vector = vector_size / sizeof(T); using VectorType = typename wrapper::traits::neon_vector<T, element_per_vector>::type; using TagType = typename wrapper::traits::neon_vector<T, element_per_vector>::tag_type; - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window); const VectorType zero_vector = wrapper::vdup_n(static_cast<T>(0), TagType{}); @@ -135,9 +135,9 @@ void depthwise_loop_multiplier1_fp(const ITensor *input, const ITensor *weights, Window win_output = window; win_output.set(Window::DimX, dim_manual_loop); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -224,10 +224,10 @@ void depthwise_loop_multiplier1_fp(const ITensor *input, const ITensor *weights, } template <typename T> -void depthwise_loop_generic_fp(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, +void depthwise_loop_generic_fp(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, const Size2D &dilation, unsigned int depth_multiplier, const Window &window, bool has_biases) { - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window, depth_multiplier); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window, depth_multiplier); Window execution_window = window; execution_window.set(Window::DimX, Window::Dimension(0, run_info.input_depth, 1)); @@ -246,9 +246,9 @@ void depthwise_loop_generic_fp(const ITensor *input, const ITensor *weights, con Window win_output = window; win_output.set_dimension_step(Window::DimX, run_info.x_step); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -306,23 +306,24 @@ void depthwise_loop_generic_fp(const ITensor *input, const ITensor *weights, con } template <typename T, typename TW> -void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const Size2D &dilation, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) +void depthwise_loop_multiplier1_quantized(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, + const Size2D &dilation, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) // NOLINT { + ARM_COMPUTE_UNUSED(output_multiplier, output_shift); constexpr auto element_per_vector = vector_size / sizeof(T); using VectorType = typename wrapper::traits::neon_vector<T, element_per_vector>::type; using TagType = typename wrapper::traits::neon_vector<T, element_per_vector>::tag_type; using AccType = int32_t; using AccArrayType = std::array<AccType, element_per_vector>; - const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()).get<T>(); + const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), src->info()->data_type(), src->info()->quantization_info()).get<T>(); const auto out_of_bound_vector = wrapper::vdup_n(static_cast<T>(out_of_bound_value), TagType{}); - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window); - const int32_t input_qoffset = input->info()->quantization_info().uniform().offset; + const int32_t input_qoffset = src->info()->quantization_info().uniform().offset; const int32_t weights_qoffset = weights->info()->quantization_info().uniform().offset; - const int32_t output_qoffset = output->info()->quantization_info().uniform().offset; + const int32_t output_qoffset = dst->info()->quantization_info().uniform().offset; const int32_t k_offset = run_info.weights_width * run_info.weights_height * input_qoffset * weights_qoffset; Window execution_window = window; @@ -339,9 +340,9 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w Window win_output = window; win_output.set(Window::DimX, dim_manual_loop); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -482,18 +483,18 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w } template <typename T, typename TW> -void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) +void depthwise_loop_generic_quantized(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, + const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) // NOLINT { using AccType = int32_t; - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window, depth_multiplier); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window, depth_multiplier); - const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()).get<T>(); + const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), src->info()->data_type(), src->info()->quantization_info()).get<T>(); - const int32_t input_qoffset = input->info()->quantization_info().uniform().offset; + const int32_t input_qoffset = src->info()->quantization_info().uniform().offset; const int32_t weights_qoffset = weights->info()->quantization_info().uniform().offset; - const int32_t output_qoffset = output->info()->quantization_info().uniform().offset; + const int32_t output_qoffset = dst->info()->quantization_info().uniform().offset; const int32_t k_offset = run_info.weights_width * run_info.weights_height * input_qoffset * weights_qoffset; Window execution_window = window; @@ -512,9 +513,9 @@ void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weigh Window win_output = window; win_output.set_dimension_step(Window::DimX, run_info.x_step); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -585,8 +586,8 @@ void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weigh } template <typename T, typename TW> -void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) +void depthwise_loop_pow2_quantized_per_tensor(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, + const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) // NOLINT { constexpr int half_vec = vector_size / 2; @@ -595,11 +596,11 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso using AccVectorTagType = typename wrapper::traits::neon_vector<AccType, half_vec>::tag_type; using TagType = typename wrapper::traits::neon_vector<T, vector_size>::tag_type; - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window, depth_multiplier); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window, depth_multiplier); - const auto input_qoffset_vec = wrapper::vreinterpret(wrapper::vmovl(wrapper::vdup_n(static_cast<T>(input->info()->quantization_info().uniform().offset), TagType{}))); + const auto input_qoffset_vec = wrapper::vreinterpret(wrapper::vmovl(wrapper::vdup_n(static_cast<T>(src->info()->quantization_info().uniform().offset), TagType{}))); const auto weights_qoffset_vec = wrapper::vreinterpret(wrapper::vmovl(wrapper::vdup_n(static_cast<TW>(weights->info()->quantization_info().uniform().offset), TagType{}))); - const auto output_qoffset_vec = wrapper::vdup_n(output->info()->quantization_info().uniform().offset, arm_compute::wrapper::traits::vector_128_tag{}); + const auto output_qoffset_vec = wrapper::vdup_n(dst->info()->quantization_info().uniform().offset, arm_compute::wrapper::traits::vector_128_tag{}); const auto lower = wrapper::vdup_n(static_cast<AccType>(std::numeric_limits<T>::lowest()), AccVectorTagType{}); const auto upper = wrapper::vdup_n(static_cast<AccType>(std::numeric_limits<T>::max()), AccVectorTagType{}); @@ -624,9 +625,9 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso Window win_output = window; win_output.set_dimension_step(Window::DimX, run_info.x_step); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -722,16 +723,16 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso input_it, weights_it, biases_it, output_it); } -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info) +Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(info.depth_multiplier == 0); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (info.dilation.x() - 1) > input->dimension(1) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right()); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (info.dilation.y() - 1) > input->dimension(2) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom()); - ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(0) * info.depth_multiplier) != weights->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (info.dilation.x() - 1) > src->dimension(1) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right()); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (info.dilation.y() - 1) > src->dimension(2) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom()); + ARM_COMPUTE_RETURN_ERROR_ON((src->dimension(0) * info.depth_multiplier) != weights->dimension(0)); ARM_COMPUTE_RETURN_ERROR_ON((info.dilation.x() < 1) || (info.dilation.y() < 1)); ARM_COMPUTE_RETURN_ERROR_ON((info.pad_stride_info.stride().first < 1) || (info.pad_stride_info.stride().second < 1)); @@ -742,7 +743,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } else { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights); } if(biases != nullptr) @@ -750,7 +751,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(0)); - if(is_data_type_quantized_asymmetric(input->data_type())) + if(is_data_type_quantized_asymmetric(src->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); } @@ -760,36 +761,36 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } } - if(output->total_size() != 0) + if(dst->total_size() != 0) { - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), output_shape); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); } return Status{}; } } // namespace -CpuDepthwiseConvolutionNativeKernel::CpuDepthwiseConvolutionNativeKernel() +CpuDepthwiseConv2dNativeKernel::CpuDepthwiseConv2dNativeKernel() : _func(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases() { } -void CpuDepthwiseConvolutionNativeKernel::configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info) +void CpuDepthwiseConv2dNativeKernel::configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, weights, (biases != nullptr) ? biases : nullptr, output, info)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, (biases != nullptr) ? biases : nullptr, dst, info)); _conv_info = info.pad_stride_info; _depth_multiplier = info.depth_multiplier; _dilation = info.dilation; _has_biases = (biases != nullptr); - if(is_data_type_quantized(input->data_type())) + if(is_data_type_quantized(src->data_type())) { - const auto input_scale = input->quantization_info().uniform().scale; - const auto output_scale = output->quantization_info().uniform().scale; + const auto input_scale = src->quantization_info().uniform().scale; + const auto output_scale = dst->quantization_info().uniform().scale; auto weights_scale = weights->quantization_info().scale(); if(!is_data_type_quantized_per_channel(weights->data_type())) @@ -815,50 +816,50 @@ void CpuDepthwiseConvolutionNativeKernel::configure(const ITensorInfo *input, co switch(weights->data_type()) { case DataType::QASYMM8: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, uint8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<uint8_t, uint8_t>; break; case DataType::QASYMM8_SIGNED: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<int8_t, int8_t>; break; case DataType::QSYMM8_PER_CHANNEL: - if(input->data_type() == DataType::QASYMM8) + if(src->data_type() == DataType::QASYMM8) { - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, int8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<uint8_t, int8_t>; } else { - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<int8_t, int8_t>; } break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float16_t, float16_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<float16_t, float16_t>; break; #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float, float>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<float, float>; break; default: ARM_COMPUTE_ERROR("Data type not supported"); break; } - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); - auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info())); + const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); + auto_init_if_empty(*dst, src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(dst->quantization_info())); - Window win = calculate_max_window(*output, Steps()); + Window win = calculate_max_window(*dst, Steps()); ICpuKernel::configure(win); } -Status CpuDepthwiseConvolutionNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info) +Status CpuDepthwiseConv2dNativeKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, info)); return Status{}; } -template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::FloatEnalber<T>> -void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, - ITensor *dst, const Window &window, bool has_biases) +template <typename T, typename TW, CpuDepthwiseConv2dNativeKernel::FloatEnalber<T>> +void CpuDepthwiseConv2dNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, + ITensor *dst, const Window &window, bool has_biases) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); @@ -873,9 +874,9 @@ void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, cons } } -template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::Quantized8bitEnalber<T>> -void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, - ITensor *dst, const Window &window, bool has_biases) +template <typename T, typename TW, CpuDepthwiseConv2dNativeKernel::Quantized8bitEnalber<T>> +void CpuDepthwiseConv2dNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, + ITensor *dst, const Window &window, bool has_biases) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); @@ -900,7 +901,7 @@ void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, cons } } -void CpuDepthwiseConvolutionNativeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuDepthwiseConv2dNativeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); diff --git a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h index 242536d441..559c46dc93 100644 --- a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h +++ b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H -#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H +#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H +#define ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/common/Macros.h" @@ -40,46 +40,38 @@ namespace cpu namespace kernels { /** Interface for the kernel to run a depthwise convolution native on a tensor. */ -class CpuDepthwiseConvolutionNativeKernel : public ICpuKernel +class CpuDepthwiseConv2dNativeKernel : public ICpuKernel { public: const char *name() const override { - return "CpuDepthwiseConvolutionNativeKernel"; + return "CpuDepthwiseConv2dNativeKernel"; } /** Default constructor */ - CpuDepthwiseConvolutionNativeKernel(); - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConvolutionNativeKernel); + CpuDepthwiseConv2dNativeKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConv2dNativeKernel); /** Initialize the function's source, destination and parameters. * * @note Supported data layouts: NHWC * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] src Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. + * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED. + * @param[out] dst Destination tensor. Data type supported: Same as @p src. * @param[in] info Depthwise convolution meta-data. * */ - void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionNativeKernel + void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info); + /** Static function to check if given info will lead to a valid configuration * - * @note Supported data layouts: NHWC - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] info Depthwise convolution meta-data. + * Similar to CpuDepthwiseConv2dNativeKernel::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info); + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info); // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; @@ -101,7 +93,7 @@ private: * * @param[in] window Region on which to execute the kernel. */ - using DepthwiseFunctionPtr = void (CpuDepthwiseConvolutionNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases); + using DepthwiseFunctionPtr = void (CpuDepthwiseConv2dNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases); DepthwiseFunctionPtr _func; PadStrideInfo _conv_info; @@ -114,4 +106,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H */ +#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H */ diff --git a/src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp b/src/core/cpu/kernels/CpuDirectConv2dKernel.cpp index 4f46eb2bf6..c0fc41525e 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp +++ b/src/core/cpu/kernels/CpuDirectConv2dKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuDirectConvolutionKernel.h" +#include "src/core/cpu/kernels/CpuDirectConv2dKernel.h" #include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" @@ -995,7 +995,7 @@ bool have_zero_x_internal_padding(ITensorInfo *src, ITensorInfo *weights) } // namespace template <typename T> -void CpuDirectConvolutionKernel::convolve_nhwc_optimized(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) +void CpuDirectConv2dKernel::convolve_nhwc_optimized(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) { // This function assumes that input and weights have not padding in channel @@ -1116,7 +1116,7 @@ void CpuDirectConvolutionKernel::convolve_nhwc_optimized(const Window &window, c } template <typename T> -void CpuDirectConvolutionKernel::convolve_nhwc(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) +void CpuDirectConv2dKernel::convolve_nhwc(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) { // Declare useful types using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>; @@ -1219,12 +1219,12 @@ void CpuDirectConvolutionKernel::convolve_nhwc(const Window &window, const ITens out); } -BorderSize CpuDirectConvolutionKernel::border_size() const +BorderSize CpuDirectConv2dKernel::border_size() const { return _border_size; } -void CpuDirectConvolutionKernel::configure(ITensorInfo *src, ITensorInfo *weights, ITensorInfo *dst, const PadStrideInfo &conv_info) +void CpuDirectConv2dKernel::configure(ITensorInfo *src, ITensorInfo *weights, ITensorInfo *dst, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); @@ -1263,7 +1263,7 @@ void CpuDirectConvolutionKernel::configure(ITensorInfo *src, ITensorInfo *weight ICpuKernel::configure(win_config.second); } -Status CpuDirectConvolutionKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info) +Status CpuDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info) { unsigned int num_weight_elems_read_per_row = 0; unsigned int num_elems_read_per_iteration = 0; @@ -1283,7 +1283,7 @@ Status CpuDirectConvolutionKernel::validate(const ITensorInfo *src, const ITenso return Status{}; } -void CpuDirectConvolutionKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -1376,7 +1376,7 @@ void CpuDirectConvolutionKernel::run_op(ITensorPack &tensors, const Window &wind } } } -const char *CpuDirectConvolutionKernel::name() const +const char *CpuDirectConv2dKernel::name() const { return "CpuDirectConvolutionLayerKernel"; } diff --git a/src/core/cpu/kernels/CpuDirectConvolutionKernel.h b/src/core/cpu/kernels/CpuDirectConv2dKernel.h index fb8218394b..62ed96f255 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionKernel.h +++ b/src/core/cpu/kernels/CpuDirectConv2dKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DIRECTCONVOLUTION_KERNEL_H -#define ARM_COMPUTE_CPU_DIRECTCONVOLUTION_KERNEL_H +#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H +#define ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H #include "src/core/common/Macros.h" #include "src/core/cpu/ICpuKernel.h" @@ -35,13 +35,13 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform Direct Convolution Layer. */ -class CpuDirectConvolutionKernel : public ICpuKernel +class CpuDirectConv2dKernel : public ICpuKernel { public: /** Default constructor */ - CpuDirectConvolutionKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConvolutionKernel); - /** Set the input, weights, and output tensors. + CpuDirectConv2dKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv2dKernel); + /** Set the src, weights, and dst tensors. * * @note: DirectConvolution only works in the following configurations: * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 @@ -57,16 +57,9 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(ITensorInfo *src, ITensorInfo *weights, ITensorInfo *dst, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDirectConvolutionKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] dst Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * Similar to CpuDirectConv2dKernel::configure() * * @return a status */ @@ -97,4 +90,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_CPU_DIRECTCONVOLUTION_KERNEL_H */ +#endif /*ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H */ diff --git a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp index 5f7a574e5a..662d052941 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp +++ b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h" +#include "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -384,8 +384,8 @@ void output_stage_nhwc(ITensor *src, const ITensor *bias, const Window &window, } } // namespace -void CpuDirectConvolutionOutputStageKernel::configure(ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, - const DirectConvolutionLayerOutputStageKernelInfo &info) +void CpuDirectConv2dOutputStageKernel::configure(ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, + const DirectConvolutionLayerOutputStageKernelInfo &info) { ARM_COMPUTE_UNUSED(bias); // Perform validation step @@ -483,14 +483,14 @@ void CpuDirectConvolutionOutputStageKernel::configure(ITensorInfo *src, const IT } } -Status CpuDirectConvolutionOutputStageKernel::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, - const DirectConvolutionLayerOutputStageKernelInfo &info) +Status CpuDirectConv2dOutputStageKernel::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, + const DirectConvolutionLayerOutputStageKernelInfo &info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, bias, dst, info)); return Status{}; } -void CpuDirectConvolutionOutputStageKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuDirectConv2dOutputStageKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -504,9 +504,9 @@ void CpuDirectConvolutionOutputStageKernel::run_op(ITensorPack &tensors, const W (*_func)(src, bias, window, dst, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift); } -const char *CpuDirectConvolutionOutputStageKernel::name() const +const char *CpuDirectConv2dOutputStageKernel::name() const { - return "CpuDirectConvolutionOutputStageKernel"; + return "CpuDirectConv2dOutputStageKernel"; } } // namespace kernels } // namespace cpu diff --git a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h index 9eeab194cb..62bc5d41c9 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h +++ b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DIRECTCONVOLUTION_OUTPUTSTAGE_KERNEL_H -#define ARM_COMPUTE_CPU_DIRECTCONVOLUTION_OUTPUTSTAGE_KERNEL_H +#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H +#define ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H #include "arm_compute/core/KernelDescriptors.h" #include "src/core/common/Macros.h" @@ -41,33 +41,27 @@ namespace kernels * @note For quantized computations (i.e. @p src of S32 type) the output data type for auto-initialization must be passed as part * of the @ref DirectConvolutionLayerOutputStageKernelInfo. */ -class CpuDirectConvolutionOutputStageKernel : public ICpuKernel +class CpuDirectConv2dOutputStageKernel : public ICpuKernel { public: /** Default constructor */ - CpuDirectConvolutionOutputStageKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConvolutionOutputStageKernel); + CpuDirectConv2dOutputStageKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv2dOutputStageKernel); /** Set the accumulate buffer and the biases of the kernel. * - * @param[in, out] src Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * @param[in, out] src Input to add the bias to. If @p dst is not specified then accumulation is done in-place. * Data type supported: F16/F32/S32 * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p src - * @param[out] dst (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * @param[out] dst (Optional) If the dst tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p src is S32 * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata */ void configure(ITensorInfo *src, const ITensorInfo *bias = nullptr, ITensorInfo *dst = nullptr, const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDirectConvolutionOutputStageKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: F16/F32/S32 - * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p src - * @param[in] dst (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. - * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p src is S32 - * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata + * Similar to CpuDirectConv2dOutputStageKernel::configure() * * @return a status */ @@ -90,4 +84,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_CPU_DIRECTCONVOLUTION_OUTPUTSTAGE_KERNEL_H */ +#endif /*ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H */ diff --git a/src/core/cpu/kernels/CpuPoolingKernel.cpp b/src/core/cpu/kernels/CpuPool2dKernel.cpp index a55f60d7ad..e6f5890685 100644 --- a/src/core/cpu/kernels/CpuPoolingKernel.cpp +++ b/src/core/cpu/kernels/CpuPool2dKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuPoolingKernel.h" +#include "src/core/cpu/kernels/CpuPool2dKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -374,12 +374,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITenso } } // namespace -BorderSize CpuPoolingKernel::border_size() const +BorderSize CpuPool2dKernel::border_size() const { return _border_size; } -void CpuPoolingKernel::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) +void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; @@ -420,7 +420,7 @@ void CpuPoolingKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Pooli } } -Status CpuPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) +Status CpuPool2dKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); @@ -446,7 +446,7 @@ Status CpuPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst return Status{}; } -void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -505,9 +505,9 @@ void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const uk->ukernel(src, dst, indices, _pool_info, window_src, window); } -const char *CpuPoolingKernel::name() const +const char *CpuPool2dKernel::name() const { - return "CpuPoolingKernel"; + return "CpuPool2dKernel"; } } // namespace kernels } // namespace cpu diff --git a/src/core/cpu/kernels/CpuPoolingKernel.h b/src/core/cpu/kernels/CpuPool2dKernel.h index 87d8f67119..95298004e9 100644 --- a/src/core/cpu/kernels/CpuPoolingKernel.h +++ b/src/core/cpu/kernels/CpuPool2dKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_POOLING_KERNEL_H -#define ARM_COMPUTE_CPU_POOLING_KERNEL_H +#ifndef ARM_COMPUTE_CPU_POOL2D_KERNEL_H +#define ARM_COMPUTE_CPU_POOL2D_KERNEL_H #include "arm_compute/core/Types.h" #include "src/core/common/Macros.h" @@ -35,12 +35,12 @@ namespace cpu namespace kernels { /** Interface for the pooling layer kernel */ -class CpuPoolingKernel : public ICpuKernel +class CpuPool2dKernel : public ICpuKernel { public: /** Default constructor */ - CpuPoolingKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPoolingKernel); + CpuPool2dKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dKernel); /** Configure kernel for a given list of arguments * * @note F16 are supported for pool sizes 2 and 3 only @@ -51,14 +51,9 @@ public: * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. */ void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CpuPoolingKernel + /** Static function to check if given info will lead to a valid configuration * - * @note F16 are supported for pool sizes 2 and 3 only - * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: Same as @p src. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * Similar to CpuPool2dKernel::configure() * * @return a status */ @@ -80,4 +75,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_CPU_POOLING_KERNEL_H */ +#endif /*ARM_COMPUTE_CPU_POOL2D_KERNEL_H */ diff --git a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp index ccf73883f0..c78ffb9848 100644 --- a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp +++ b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h" +#include "src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -41,7 +41,7 @@ namespace kernels { using namespace arm_compute::misc::shape_calculator; -void CpuPoolingAssemblyWrapperKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +void CpuPool2dAssemblyWrapperKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); @@ -88,7 +88,7 @@ void CpuPoolingAssemblyWrapperKernel::configure(const ITensorInfo *src, ITensorI INEKernel::configure(win); } -Status CpuPoolingAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info) +Status CpuPool2dAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); @@ -136,7 +136,7 @@ Status CpuPoolingAssemblyWrapperKernel::validate(const ITensorInfo *src, const I return Status{}; } -void CpuPoolingAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuPool2dAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(_kernel_asm.get()); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -170,18 +170,18 @@ void CpuPoolingAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window working_space, info.thread_id, info.num_threads); } -size_t CpuPoolingAssemblyWrapperKernel::get_working_size(unsigned int num_threads) const +size_t CpuPool2dAssemblyWrapperKernel::get_working_size(unsigned int num_threads) const { return _kernel_asm->get_working_size(num_threads); } -bool CpuPoolingAssemblyWrapperKernel::is_configured() const +bool CpuPool2dAssemblyWrapperKernel::is_configured() const { return _kernel_asm != nullptr; } template <typename Typesrc, typename Typedst> -void CpuPoolingAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +void CpuPool2dAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) { const arm_conv::pooling::PoolingType pool_type = (info.pool_type == PoolingType::AVG) ? arm_conv::pooling::PoolingType::AVERAGE : arm_conv::pooling::PoolingType::MAX; @@ -220,7 +220,7 @@ void CpuPoolingAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src, } template <typename Typesrc, typename Typedst> -void CpuPoolingAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +void CpuPool2dAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) { const arm_conv::pooling::PoolingType pool_type = (info.pool_type == PoolingType::AVG) ? arm_conv::pooling::PoolingType::AVERAGE : arm_conv::pooling::PoolingType::MAX; diff --git a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h index 34ec452deb..3afa4c16a4 100644 --- a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h +++ b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_POOLING_ASSEMBLY_WRAPPER_KERNEL_H -#define ARM_COMPUTE_CPU_POOLING_ASSEMBLY_WRAPPER_KERNEL_H +#ifndef ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H +#define ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H #include "arm_compute/core/Types.h" #include "src/core/NEON/kernels/assembly/pooling.hpp" @@ -41,23 +41,21 @@ namespace kernels * * Some kernels were written in assembly and highly optimised for specific * CPUs like A53 or A55. The arm compute library creates an instance of - * CpuPoolingAssemblyWrapperKernel and other auxiliary data structures to + * CpuPool2dAssemblyWrapperKernel and other auxiliary data structures to * execute a single assembly kernel in the context of an NEFunction. * */ -class CpuPoolingAssemblyWrapperKernel final : public ICpuKernel +class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel { public: /** Constructor */ - CpuPoolingAssemblyWrapperKernel() = default; - CpuPoolingAssemblyWrapperKernel(CpuPoolingAssemblyWrapperKernel &) = delete; - CpuPoolingAssemblyWrapperKernel(CpuPoolingAssemblyWrapperKernel &&) = default; - CpuPoolingAssemblyWrapperKernel &operator=(CpuPoolingAssemblyWrapperKernel &) = delete; + CpuPool2dAssemblyWrapperKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dAssemblyWrapperKernel); const char *name() const override { - return "CpuPoolingAssemblyWrapperKernel"; + return "CpuPool2dAssemblyWrapperKernel"; } /** Initialise the kernel's src and dst. @@ -69,13 +67,11 @@ public: */ void configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info); - /** Indicates whether or not this function can be used to process the given parameters. + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor to store the result of pooling. Data types supported: same as @p src. - * @param[in] info Pooling meta-data + * Similar to CpuPool2dAssemblyWrapperKernel::configure() * - * @return a status. + * @return a status */ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info); @@ -120,4 +116,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_POOLING_ASSEMBLY_WRAPPER_KERNEL_H */ +#endif /* ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp index 0a5101f564..2c9a4f301b 100644 --- a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/gpu/cl/kernels/ClDirectConvolutionKernel.h" +#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -369,13 +369,13 @@ bool export_to_cl_image_support(ITensorInfo *tensor, GPUTarget gpu_target, DataL } // namespace -BorderSize ClDirectConvolutionKernel::border_size() const +BorderSize ClDirectConv2dKernel::border_size() const { return _border_size; } -void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info) +void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, + const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); @@ -564,8 +564,8 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex _config_id += lower_string(string_from_data_layout(_data_layout)); } -Status ClDirectConvolutionKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const GPUTarget target) +Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, + const GPUTarget target) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), weights->clone().get(), dst->clone().get(), conv_info, target).first); @@ -573,7 +573,7 @@ Status ClDirectConvolutionKernel::validate(const ITensorInfo *src, const ITensor return Status{}; } -void ClDirectConvolutionKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); diff --git a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.h b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h index 384b561003..ec76624e5c 100644 --- a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.h +++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_DIRECT_CONVOLUTION_KERNEL_H -#define ARM_COMPUTE_CL_DIRECT_CONVOLUTION_KERNEL_H +#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_KERNEL_H +#define ARM_COMPUTE_CL_DIRECT_CONV2D_KERNEL_H #include "src/core/common/Macros.h" #include "src/core/gpu/cl/ClCompileContext.h" @@ -36,11 +36,11 @@ namespace kernels { /** Interface for the direct convolution kernel. */ -class ClDirectConvolutionKernel : public IClKernel +class ClDirectConv2dKernel : public IClKernel { public: - ClDirectConvolutionKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDirectConvolutionKernel); + ClDirectConv2dKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDirectConv2dKernel); /** Set the src, weights, biases and dst tensors info. * * @note: Due to set_valid_region(), thus src/weights/biases cannot be const. Need to change this once the set_valid_region() is removed. @@ -64,19 +64,9 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref ClDirectConvolutionKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src The src tensor info to convolve. 3 lower dimensions represent a single src [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the src's volume 3rd dimension. - * Data type supported:Same as @p src. - * @param[in] biases Biases tensor info. Biases are 1D tensor with dimension [OFM]. - * Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] dst Output tensor info. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p src. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] target Target GPU architecture. + * Similar to ClDirectConv2dKernel::configure() * * @return a status */ @@ -94,4 +84,4 @@ public: } // namespace kernels } // namespace opencl } // namespace arm_compute -#endif /*ARM_COMPUTE_CL_DIRECT_CONVOLUTION_KERNEL_H */ +#endif /*ARM_COMPUTE_CL_DIRECT_CONV2D_KERNEL_H */ diff --git a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp b/src/core/gpu/cl/kernels/ClPool2dKernel.cpp index 08a3ce3784..0e15bffd14 100644 --- a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPool2dKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/gpu/cl/kernels/ClPoolingKernel.h" +#include "src/core/gpu/cl/kernels/ClPool2dKernel.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" @@ -202,17 +202,17 @@ std::tuple<Status, Window, ClPoolingConfig> validate_and_configure_window(ITenso } } // namespace -ClPoolingKernel::ClPoolingKernel() +ClPool2dKernel::ClPool2dKernel() : _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1) { } -BorderSize ClPoolingKernel::border_size() const +BorderSize ClPool2dKernel::border_size() const { return _border_size; } -void ClPoolingKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) +void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); @@ -422,7 +422,7 @@ void ClPoolingKernel::configure(const ClCompileContext &compile_context, ITensor ARM_COMPUTE_ERROR_ON(src->data_layout() == DataLayout::NHWC && has_padding_changed(padding_info)); } -Status ClPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) +Status ClPool2dKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, pool_info, indices)); ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(src->clone().get(), dst->clone().get(), pool_info))); @@ -430,7 +430,7 @@ Status ClPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, return Status{}; } -void ClPoolingKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) +void ClPool2dKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); diff --git a/src/core/gpu/cl/kernels/ClPoolingKernel.h b/src/core/gpu/cl/kernels/ClPool2dKernel.h index c1ce859e2c..8ecb8eb7b7 100644 --- a/src/core/gpu/cl/kernels/ClPoolingKernel.h +++ b/src/core/gpu/cl/kernels/ClPool2dKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_POOLING_KERNEL_H -#define ARM_COMPUTE_CL_POOLING_KERNEL_H +#ifndef ARM_COMPUTE_CL_POOL2D_KERNEL_H +#define ARM_COMPUTE_CL_POOL2D_KERNEL_H #include "src/core/common/Macros.h" #include "src/core/gpu/cl/ClCompileContext.h" @@ -35,12 +35,12 @@ namespace opencl namespace kernels { /** Interface for the pooling layer kernel */ -class ClPoolingKernel : public IClKernel +class ClPool2dKernel : public IClKernel { public: /** Default constructor */ - ClPoolingKernel(); - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPoolingKernel); + ClPool2dKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClPool2dKernel); /** Configure kernel for a given list of arguments * @@ -52,12 +52,9 @@ public: * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. */ void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref ClPoolingKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * Similar to ClPool2dKernel::configure() * * @return a status */ @@ -76,4 +73,4 @@ public: } // namespace kernels } // namespace opencl } // namespace arm_compute -#endif /*ARM_COMPUTE_CL_POOLING_KERNEL_H */ +#endif /* ARM_COMPUTE_CL_POOL2D_KERNEL_H */ diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index 74867ff64f..907e69d8d7 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -29,17 +29,17 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/runtime/gpu/cl/operators/ClActivation.h" -#include "src/runtime/gpu/cl/operators/ClDirectConvolution.h" +#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h" namespace arm_compute { struct CLDirectConvolutionLayer::Impl { - const ICLTensor *src{ nullptr }; - const ICLTensor *weights{ nullptr }; - const ICLTensor *biases{ nullptr }; - ICLTensor *dst{ nullptr }; - std::unique_ptr<opencl::ClDirectConvolution> op{ nullptr }; + const ICLTensor *src{ nullptr }; + const ICLTensor *weights{ nullptr }; + const ICLTensor *biases{ nullptr }; + ICLTensor *dst{ nullptr }; + std::unique_ptr<opencl::ClDirectConv2d> op{ nullptr }; }; CLDirectConvolutionLayer::CLDirectConvolutionLayer() @@ -65,14 +65,14 @@ void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context _impl->biases = biases; _impl->dst = output; - _impl->op = std::make_unique<opencl::ClDirectConvolution>(); + _impl->op = std::make_unique<opencl::ClDirectConv2d>(); _impl->op->configure(compile_context, input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, act_info); } Status CLDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { - return opencl::ClDirectConvolution::validate(input, weights, biases, output, conv_info, act_info); + return opencl::ClDirectConv2d::validate(input, weights, biases, output, conv_info, act_info); } void CLDirectConvolutionLayer::run() diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index fbaec1d2d9..7ba911c342 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -26,16 +26,16 @@ #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" #include "src/core/CL/ICLKernel.h" -#include "src/runtime/gpu/cl/operators/ClPooling.h" +#include "src/runtime/gpu/cl/operators/ClPool2d.h" namespace arm_compute { struct CLPoolingLayer::Impl { - const ICLTensor *src{ nullptr }; - ICLTensor *dst{ nullptr }; - ICLTensor *indices{ nullptr }; - std::unique_ptr<opencl::ClPooling> op{ nullptr }; + const ICLTensor *src{ nullptr }; + ICLTensor *dst{ nullptr }; + ICLTensor *indices{ nullptr }; + std::unique_ptr<opencl::ClPool2d> op{ nullptr }; }; CLPoolingLayer::CLPoolingLayer() @@ -55,13 +55,13 @@ void CLPoolingLayer::configure(const CLCompileContext &compile_context, ICLTenso _impl->dst = output; _impl->indices = indices; - _impl->op = std::make_unique<opencl::ClPooling>(); + _impl->op = std::make_unique<opencl::ClPool2d>(); _impl->op->configure(compile_context, input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr); } Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { - return opencl::ClPooling::validate(input, output, pool_info, indices); + return opencl::ClPool2d::validate(input, output, pool_info, indices); } void CLPoolingLayer::run() diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index da9610ef42..a561b88058 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -27,7 +27,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h" +#include "src/runtime/cpu/operators/CpuDepthwiseConv2d.h" using namespace arm_compute::misc; using namespace arm_compute::misc::shape_calculator; @@ -47,15 +47,15 @@ struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal const ITensor *biases { nullptr - }; // SRC_2 - Tensor permuted_input{}; // INT_0 - Tensor permuted_weights{}; // INT_1 - Tensor permuted_output{}; // INT_2 - Tensor workspace{}; // INT_3 - Tensor packed_weights{}; // INT_4 - std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr }; - bool is_prepared{ false }; - bool permute{ false }; + }; // SRC_2 + Tensor permuted_input{}; // INT_0 + Tensor permuted_weights{}; // INT_1 + Tensor permuted_output{}; // INT_2 + Tensor workspace{}; // INT_3 + Tensor packed_weights{}; // INT_4 + std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr }; + bool is_prepared{ false }; + bool permute{ false }; }; NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::NEDepthwiseConvolutionLayerOptimizedInternal(std::shared_ptr<IMemoryManager> memory_manager) @@ -80,7 +80,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal:: _impl->dst = output; _impl->permute = is_nhwc; - _impl->op = std::make_unique<cpu::CpuDepthwiseConvolution>(); + _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>(); ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation }; _impl->op->configure(_impl->src->info(), _impl->weights->info(), _impl->biases == nullptr ? nullptr : _impl->biases->info(), _impl->dst->info(), info); @@ -97,7 +97,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal:: } info = ConvolutionInfo{ conv_info, depth_multiplier, act_info_to_use, dilation }; - auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConvolutionAssemblyDispatch>(); + auto dwc_optimized_func = std::make_unique<cpu::CpuDepthwiseConv2dAssemblyDispatch>(); if(is_nhwc) { @@ -154,7 +154,7 @@ Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal const Size2D &dilation) { ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation }; - return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info); + return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info); } void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal::run() @@ -197,17 +197,17 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal:: struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::Impl { - Tensor permuted_input{}; - Tensor permuted_weights{}; - Tensor permuted_output{}; - bool is_prepared{ false }; - bool is_nchw{ false }; - bool is_activationlayer_enabled{ false }; - const ITensor *weights{ nullptr }; - const ITensor *biases{ nullptr }; - const ITensor *src{ nullptr }; - ITensor *dst{ nullptr }; - std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr }; + Tensor permuted_input{}; + Tensor permuted_weights{}; + Tensor permuted_output{}; + bool is_prepared{ false }; + bool is_nchw{ false }; + bool is_activationlayer_enabled{ false }; + const ITensor *weights{ nullptr }; + const ITensor *biases{ nullptr }; + const ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr }; }; NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::NEDepthwiseConvolutionLayerGeneric() @@ -223,7 +223,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure( output->info(), conv_info, depth_multiplier, act_info, dilation)); const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation }; - _impl->op = std::make_unique<cpu::CpuDepthwiseConvolution>(); + _impl->op = std::make_unique<cpu::CpuDepthwiseConv2d>(); _impl->op->configure(input->info(), weights->info(), biases == nullptr ? nullptr : biases->info(), output->info(), info); _impl->src = input; @@ -253,7 +253,7 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::configure( output_to_use = &_impl->permuted_output; } - auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>(); + auto depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>(); depthwise_conv_kernel->configure(input_to_use->info(), weights_to_use->info(), biases == nullptr ? nullptr : biases->info(), output_to_use->info(), info); if(_impl->is_nchw) @@ -273,7 +273,7 @@ Status NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::validate unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) { ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation }; - return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info); + return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info); } void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerGeneric::run() @@ -298,10 +298,10 @@ NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer(std::shared_ptr<IMemory #ifndef DOXYGEN_SKIP_THIS struct NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer::Impl { - DepthwiseConvolutionFunction depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED }; - NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr }; - NEDepthwiseConvolutionLayerGeneric func_generic{}; - std::shared_ptr<cpu::CpuDepthwiseConvolution> op{ nullptr }; + DepthwiseConvolutionFunction depth_conv_func{ DepthwiseConvolutionFunction::OPTIMIZED }; + NEDepthwiseConvolutionLayerOptimizedInternal func_optimized{ nullptr }; + NEDepthwiseConvolutionLayerGeneric func_generic{}; + std::shared_ptr<cpu::CpuDepthwiseConv2d> op{ nullptr }; }; #endif // DOXYGEN_SKIP_THIS @@ -309,7 +309,7 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh const ActivationLayerInfo &act_info, const Size2D &dilation) { const ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation }; - _impl->op = std::make_shared<cpu::CpuDepthwiseConvolution>(); + _impl->op = std::make_shared<cpu::CpuDepthwiseConv2d>(); _impl->depth_conv_func = _impl->op->get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), info); switch(_impl->depth_conv_func) @@ -329,7 +329,7 @@ Status NEDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITe unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) { ConvolutionInfo info{ conv_info, depth_multiplier, act_info, dilation }; - return cpu::CpuDepthwiseConvolution::validate(input, weights, biases, output, info); + return cpu::CpuDepthwiseConv2d::validate(input, weights, biases, output, info); } void NEDepthwiseConvolutionLayer::run() diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index 73834381c6..58530e4a8f 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -27,17 +27,17 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/runtime/cpu/operators/CpuDirectConvolution.h" +#include "src/runtime/cpu/operators/CpuDirectConv2d.h" namespace arm_compute { struct NEDirectConvolutionLayer::Impl { - ITensor *src{ nullptr }; - const ITensor *weights{ nullptr }; - const ITensor *bias{ nullptr }; - ITensor *dst{ nullptr }; - std::unique_ptr<cpu::CpuDirectConvolution> op{ nullptr }; + ITensor *src{ nullptr }; + const ITensor *weights{ nullptr }; + const ITensor *bias{ nullptr }; + ITensor *dst{ nullptr }; + std::unique_ptr<cpu::CpuDirectConv2d> op{ nullptr }; }; NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) @@ -52,14 +52,14 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, _impl->weights = weights; _impl->bias = bias; _impl->dst = output; - _impl->op = std::make_unique<cpu::CpuDirectConvolution>(_memory_manager); + _impl->op = std::make_unique<cpu::CpuDirectConv2d>(_memory_manager); _impl->op->configure(input->info(), weights->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), conv_info, act_info); } Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { - return cpu::CpuDirectConvolution::validate(input, weights, bias, output, conv_info, act_info); + return cpu::CpuDirectConv2d::validate(input, weights, bias, output, conv_info, act_info); } void NEDirectConvolutionLayer::run() diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index 1570cdeedc..bbf3e7cc4e 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -26,17 +26,17 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/Tensor.h" -#include "src/runtime/cpu/operators/CpuPooling.h" +#include "src/runtime/cpu/operators/CpuPool2d.h" namespace arm_compute { struct NEPoolingLayer::Impl { - ITensor *src{ nullptr }; - ITensor *dst{ nullptr }; - ITensor *indices{ nullptr }; - Tensor workspace{ nullptr }; - std::unique_ptr<cpu::CpuPooling> op{ nullptr }; + ITensor *src{ nullptr }; + ITensor *dst{ nullptr }; + ITensor *indices{ nullptr }; + Tensor workspace{ nullptr }; + std::unique_ptr<cpu::CpuPool2d> op{ nullptr }; }; NEPoolingLayer::~NEPoolingLayer() = default; @@ -51,7 +51,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay _impl->src = input; _impl->dst = output; _impl->indices = indices; - _impl->op = std::make_unique<cpu::CpuPooling>(); + _impl->op = std::make_unique<cpu::CpuPool2d>(); _impl->op->configure(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr); // Allocate workspace based on kernel's memory requirements @@ -66,7 +66,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { - return cpu::CpuPooling::validate(input, output, pool_info, indices); + return cpu::CpuPool2d::validate(input, output, pool_info, indices); } void NEPoolingLayer::run() diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp b/src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp index 6d097280e0..160a9fd70b 100644 --- a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp +++ b/src/runtime/cpu/operators/CpuDepthwiseConv2d.cpp @@ -21,14 +21,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h" +#include "src/runtime/cpu/operators/CpuDepthwiseConv2d.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h" +#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h" namespace arm_compute { @@ -36,61 +36,61 @@ namespace cpu { namespace { -Status validate_arguments_optimized(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info) +Status validate_arguments_optimized(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); if(!is_data_type_quantized_per_channel(weights->data_type())) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights); } - ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 || info.dilation.y() < 1); - const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > input->dimension(idx_w) + info.pad_stride_info.pad_left() + + const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > src->dimension(idx_w) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right()); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > input->dimension(idx_h) + info.pad_stride_info.pad_top() + + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > src->dimension(idx_h) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom()); if(biases != nullptr) { - const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); + const unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx)); } - ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, info)); + ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConv2dAssemblyDispatch::validate(src, weights, biases, dst, info)); //Validate Activation Layer if(info.act_info.enabled()) { - ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info)); } return Status{}; } } // namespace -CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::CpuDepthwiseConvolutionOptimizedInternal() +CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::CpuDepthwiseConv2dOptimizedInternal() : _dwc_optimized_func(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _has_bias(false), _is_quantized(false), _is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false) { } -void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configure(ITensorInfo *input, - const ITensorInfo *weights, - const ITensorInfo *biases, - ITensorInfo *output, - const ConvolutionInfo &info) +void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::configure(ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + ITensorInfo *dst, + const ConvolutionInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, (biases == nullptr) ? nullptr : biases, - output, info)); + ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, (biases == nullptr) ? nullptr : biases, + dst, info)); - _is_quantized = is_data_type_quantized_asymmetric(input->data_type()); + _is_quantized = is_data_type_quantized_asymmetric(src->data_type()); _has_bias = biases != nullptr; - _is_nchw = input->data_layout() == DataLayout::NCHW; + _is_nchw = src->data_layout() == DataLayout::NCHW; _permute = _is_nchw; _is_prepared = false; @@ -105,7 +105,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configur act_info_to_use = info.act_info; } - _dwc_optimized_func = std::make_unique<CpuDepthwiseConvolutionAssemblyDispatch>(); + _dwc_optimized_func = std::make_unique<CpuDepthwiseConv2dAssemblyDispatch>(); if(_is_nchw) { _permute_input = std::make_unique<cpu::CpuPermute>(); @@ -117,7 +117,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configur auto output_perm = std::make_unique<TensorInfo>(); // Configure the function to transform the input tensor from NCHW -> NHWC - _permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U)); + _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U)); input_perm->set_data_layout(DataLayout::NHWC); // Configure the function to transform the weights tensor from IHW -> HWI @@ -125,38 +125,38 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configur weights_perm->set_data_layout(DataLayout::NHWC); output_perm->set_data_layout(DataLayout::NHWC); - output_perm->set_quantization_info(output->quantization_info()); + output_perm->set_quantization_info(dst->quantization_info()); // Configure optimized depthwise _dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info); // Configure the function to transform the convoluted output to ACL's native ordering format NCHW output_perm->set_data_layout(DataLayout::NHWC); - _permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U)); + _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U)); } else { - _dwc_optimized_func->configure(input, weights, biases, output, info); + _dwc_optimized_func->configure(src, weights, biases, dst, info); } // Configure activation if(_is_activationlayer_enabled) { _activationlayer_function = std::make_unique<cpu::CpuActivation>(); - _activationlayer_function->configure(output, nullptr, info.act_info); + _activationlayer_function->configure(dst, nullptr, info.act_info); } } -Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::validate(const ITensorInfo *input, - const ITensorInfo *weights, - const ITensorInfo *biases, - const ITensorInfo *output, - const ConvolutionInfo &info) +Status CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *biases, + const ITensorInfo *dst, + const ConvolutionInfo &info) { - return validate_arguments_optimized(input, weights, biases, output, info); + return validate_arguments_optimized(src, weights, biases, dst, info); } -void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::run(ITensorPack &tensors) +void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::run(ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); prepare(tensors); @@ -229,7 +229,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::run(ITen } } -void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare(ITensorPack &tensors) +void CpuDepthwiseConv2d::CpuDepthwiseConv2dOptimizedInternal::prepare(ITensorPack &tensors) { if(!_is_prepared) { @@ -272,35 +272,35 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare( } } -CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::CpuDepthwiseConvolutionGeneric() +CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::CpuDepthwiseConv2dGeneric() : _depthwise_conv_kernel(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _is_nchw(true), _is_prepared(false), _is_activationlayer_enabled(false) { } -void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info) +void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolution::validate(input, weights, (biases == nullptr) ? nullptr : biases, - output, info)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2d::validate(src, weights, (biases == nullptr) ? nullptr : biases, + dst, info)); - _is_nchw = input->data_layout() == DataLayout::NCHW; + _is_nchw = src->data_layout() == DataLayout::NCHW; _is_prepared = !_is_nchw; - ITensorInfo *input_to_use = input; + ITensorInfo *input_to_use = src; const ITensorInfo *weights_to_use = weights; - ITensorInfo *output_to_use = output; + ITensorInfo *output_to_use = dst; auto input_perm = std::make_unique<TensorInfo>(); auto weights_perm = std::make_unique<TensorInfo>(); - auto output_perm = std::make_unique<TensorInfo>(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape())); + auto output_perm = std::make_unique<TensorInfo>(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape())); if(_is_nchw) { _permute_input = std::make_unique<cpu::CpuPermute>(); _permute_weights = std::make_unique<cpu::CpuPermute>(); - _permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U)); + _permute_input->configure(src, input_perm.get(), PermutationVector(2U, 0U, 1U)); input_perm->set_data_layout(DataLayout::NHWC); input_to_use = input_perm.get(); @@ -311,13 +311,13 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorI output_to_use = output_perm.get(); } - _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>(); + _depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConv2dNativeKernel>(); _depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info); if(_is_nchw) { _permute_output = std::make_unique<cpu::CpuPermute>(); - _permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U)); + _permute_output->configure(output_perm.get(), dst, PermutationVector(1U, 2U, 0U)); output_perm->set_data_layout(DataLayout::NHWC); } @@ -326,48 +326,48 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorI if(_is_activationlayer_enabled) { _activationlayer_function = std::make_unique<cpu::CpuActivation>(); - _activationlayer_function->configure(output, nullptr, info.act_info); + _activationlayer_function->configure(dst, nullptr, info.act_info); } } -Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const ConvolutionInfo &info) +Status CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - if(input->data_layout() == DataLayout::NCHW) + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); + if(src->data_layout() == DataLayout::NCHW) { - TensorShape permuted_input_shape = input->tensor_shape(); + TensorShape permuted_input_shape = src->tensor_shape(); TensorShape permuted_weights_shape = weights->tensor_shape(); - TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); + TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); permute(permuted_input_shape, PermutationVector(2U, 0U, 1U)); permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U)); permute(permuted_output_shape, PermutationVector(2U, 0U, 1U)); - const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC)); + const TensorInfo permuted_input = TensorInfo(src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC)); const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC)); - const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW)); + const TensorInfo permuted_output = TensorInfo(dst->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW)); - ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U))); + ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(src, &permuted_input, PermutationVector(2U, 0U, 1U))); ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U))); - ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U))); + ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, dst, PermutationVector(1U, 2U, 0U))); - ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info)); + ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info)); } else { - ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(input, weights, biases, output, info)); + ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConv2dNativeKernel::validate(src, weights, biases, dst, info)); } // Validate Activation Layer if(info.act_info.enabled()) { - ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(dst, nullptr, info.act_info)); } return Status{}; } -void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::run(ITensorPack &tensors) +void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::run(ITensorPack &tensors) { auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0); auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); @@ -421,7 +421,7 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::run(ITensorPack &t } } -void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPack &tensors) +void CpuDepthwiseConv2d::CpuDepthwiseConv2dGeneric::prepare(ITensorPack &tensors) { if(!_is_prepared) { @@ -440,47 +440,47 @@ void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPac } } -CpuDepthwiseConvolution::CpuDepthwiseConvolution() +CpuDepthwiseConv2d::CpuDepthwiseConv2d() : _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(), _func_generic() { } -void CpuDepthwiseConvolution::configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info) +void CpuDepthwiseConv2d::configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info) { - _depth_conv_func = get_depthwiseconvolution_function(input, weights, (biases != nullptr) ? biases : nullptr, output, info); + _depth_conv_func = get_depthwiseconvolution_function(src, weights, (biases != nullptr) ? biases : nullptr, dst, info); switch(_depth_conv_func) { case DepthwiseConvolutionFunction::OPTIMIZED: - _func_optimized.configure(input, weights, biases, output, info); + _func_optimized.configure(src, weights, biases, dst, info); break; case DepthwiseConvolutionFunction::GENERIC: - _func_generic.configure(input, weights, biases, output, info); + _func_generic.configure(src, weights, biases, dst, info); break; default: ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction"); } } -Status CpuDepthwiseConvolution::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info) +Status CpuDepthwiseConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info) { - DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, info); + DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(src, weights, biases, dst, info); switch(depth_conv_func) { case DepthwiseConvolutionFunction::OPTIMIZED: - return CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info); + return CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info); break; case DepthwiseConvolutionFunction::GENERIC: - return CpuDepthwiseConvolutionGeneric::validate(input, weights, biases, output, info); + return CpuDepthwiseConv2dGeneric::validate(src, weights, biases, dst, info); break; default: ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction"); } } -DepthwiseConvolutionFunction CpuDepthwiseConvolution::get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const ConvolutionInfo &info) +DepthwiseConvolutionFunction CpuDepthwiseConv2d::get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const ConvolutionInfo &info) { - if(bool(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info))) + if(bool(CpuDepthwiseConv2dOptimizedInternal::validate(src, weights, biases, dst, info))) { return DepthwiseConvolutionFunction::OPTIMIZED; } @@ -490,7 +490,7 @@ DepthwiseConvolutionFunction CpuDepthwiseConvolution::get_depthwiseconvolution_f } } -void CpuDepthwiseConvolution::run(ITensorPack &tensors) +void CpuDepthwiseConv2d::run(ITensorPack &tensors) { switch(_depth_conv_func) { @@ -505,7 +505,7 @@ void CpuDepthwiseConvolution::run(ITensorPack &tensors) } } -void CpuDepthwiseConvolution::prepare(ITensorPack &tensors) +void CpuDepthwiseConv2d::prepare(ITensorPack &tensors) { switch(_depth_conv_func) { diff --git a/src/runtime/cpu/operators/CpuDepthwiseConv2d.h b/src/runtime/cpu/operators/CpuDepthwiseConv2d.h new file mode 100644 index 0000000000..049397fe60 --- /dev/null +++ b/src/runtime/cpu/operators/CpuDepthwiseConv2d.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2D_H +#define ARM_COMPUTE_CPU_DEPTHWISECONV2D_H + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/experimental/Types.h" +#include "src/core/cpu/ICpuKernel.h" +#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h" +#include "src/runtime/cpu/ICpuOperator.h" +#include "src/runtime/cpu/operators/CpuActivation.h" +#include "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h" +#include "src/runtime/cpu/operators/CpuPermute.h" + +#include <memory> + +namespace arm_compute +{ +namespace cpu +{ +/** Function to execute a depthwise convolution. + */ +class CpuDepthwiseConv2d : public ICpuOperator +{ +public: + /** Default constructor */ + CpuDepthwiseConv2d(); + /** Initialize the function's source, destination, weights and convolution information. + * + * @param[in, out] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[out] dst Destination tensor info. Data type supported: same as @p src. + * @param[in] weights Weights tensor info. These are 3D tensor infos with shape [kernel_x, kernel_y, IFM]. + * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED. + * @param[in] info Depthwise convolution meta-data. + */ + void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info); + + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuDepthwiseConv2d::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info); + + /** Static function to choose the best depthwise convolution function for @ref CpuDepthwiseConv2d + * + * @param[in] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 + * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. + * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED. + * @param[in] dst Destination tensor. Data type supported: same as @p src. + * @param[in] info Depthwise convolution meta-data. + * + * @return a Depthwise Convolution Function + */ + static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const ConvolutionInfo &info); + + // Inherited methods overriden: + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; + +private: + /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels: + * + * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported + * + * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present + * -# @ref CpuDepthwiseConv2d3x3Kernel if 3x3 and no assembly kernel implementation is present + * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present + * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of dst is required + * -# @ref NEActivationLayer if fused activation is required + * + */ + class CpuDepthwiseConv2dOptimizedInternal : public ICpuOperator + { + public: + /** Default constructor */ + CpuDepthwiseConv2dOptimizedInternal(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuDepthwiseConv2dOptimizedInternal(const CpuDepthwiseConv2dOptimizedInternal &) = delete; + /** Default move constructor */ + CpuDepthwiseConv2dOptimizedInternal(CpuDepthwiseConv2dOptimizedInternal &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuDepthwiseConv2dOptimizedInternal &operator=(const CpuDepthwiseConv2dOptimizedInternal &) = delete; + /** Default move assignment operator */ + CpuDepthwiseConv2dOptimizedInternal &operator=(CpuDepthwiseConv2dOptimizedInternal &&) = default; + /** Default destructor */ + ~CpuDepthwiseConv2dOptimizedInternal() = default; + /** Initialize the function's source, destination, kernels and border_size. + * + * @param[in, out] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). + * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p src. + * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED. + * @param[out] dst Destination tensor info. Data type supported: same as @p src. + * @param[in] info Depthwise convolution meta-data. + */ + void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info); + + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuDepthwiseConv2dOptimizedInternal::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info); + + // Inherited methods overriden: + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; + + private: + std::unique_ptr<CpuDepthwiseConv2dAssemblyDispatch> _dwc_optimized_func{ nullptr }; + std::unique_ptr<CpuPermute> _permute_input{ nullptr }; + std::unique_ptr<CpuPermute> _permute_weights{ nullptr }; + std::unique_ptr<CpuPermute> _permute_output{ nullptr }; + std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr }; + bool _has_bias{ false }; + bool _is_quantized{ false }; + bool _is_nchw{ true }; + bool _permute{ false }; + bool _is_activationlayer_enabled{ false }; + bool _is_prepared{ false }; + }; + + /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: + * + * -# @ref CpuDepthwiseConv2dNativeKernel + * + */ + class CpuDepthwiseConv2dGeneric : public ICpuOperator + { + public: + /** Default constructor */ + CpuDepthwiseConv2dGeneric(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuDepthwiseConv2dGeneric(const CpuDepthwiseConv2dGeneric &) = delete; + /** Default move constructor */ + CpuDepthwiseConv2dGeneric(CpuDepthwiseConv2dGeneric &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CpuDepthwiseConv2dGeneric &operator=(const CpuDepthwiseConv2dGeneric &) = delete; + /** Default move assignment operator */ + CpuDepthwiseConv2dGeneric &operator=(CpuDepthwiseConv2dGeneric &&) = default; + /** Default destructor */ + ~CpuDepthwiseConv2dGeneric() = default; + /** Initialize the function's source, destination, weights and convolution information. + * + * @param[in, out] src Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). + * @param[out] dst Destination tensor info. Data type supported: same as @p src. + * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. + * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED. + * @param[in] info Depthwise convolution meta-data. + */ + void configure(ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info); + + /** Static function to check if given info will lead to a valid configuration + * + * Similar to CpuDepthwiseConv2dGeneric::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info); + + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; + + private: + std::unique_ptr<kernels::CpuDepthwiseConv2dNativeKernel> _depthwise_conv_kernel{ nullptr }; + std::unique_ptr<CpuPermute> _permute_input{ nullptr }; + std::unique_ptr<CpuPermute> _permute_weights{ nullptr }; + std::unique_ptr<CpuPermute> _permute_output{ nullptr }; + std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr }; + bool _is_nchw{ true }; + bool _is_prepared{ false }; + bool _is_activationlayer_enabled{ false }; + }; + + DepthwiseConvolutionFunction _depth_conv_func; + CpuDepthwiseConv2dOptimizedInternal _func_optimized; + CpuDepthwiseConv2dGeneric _func_generic; +}; +} // namespace cpu +} // namespace arm_compute +#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2D_H */ diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp index 039714abb1..a36ee1d45b 100644 --- a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp @@ -22,7 +22,7 @@ * SOFTWARE. */ -#include "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h" +#include "src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Utils.h" @@ -211,13 +211,13 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> get_fp32_convolver(int kernel_ } } -std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensorInfo *input, +std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensorInfo *src, const ITensorInfo *weights, ITensorInfo *output, const ConvolutionInfo &info) { - const DataType data_type = input->data_type(); - const TensorShape shape = input->tensor_shape(); + const DataType data_type = src->data_type(); + const TensorShape shape = src->tensor_shape(); const int n_batches = shape[3]; const int in_rows = shape.z(); @@ -249,7 +249,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor // Create quantized convolver if(is_uniform_quantized) { - const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform(); + const UniformQuantizationInfo input_qinfo = src->quantization_info().uniform(); const UniformQuantizationInfo weights_qinfo = weights->quantization_info().uniform(); const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform(); @@ -273,7 +273,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor } else if(is_perchannel_quantized) { - const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform(); + const UniformQuantizationInfo input_qinfo = src->quantization_info().uniform(); const QuantizationInfo weights_qinfo = weights->quantization_info(); const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform(); @@ -327,7 +327,7 @@ std::unique_ptr<depthwise::IDepthwiseConvolution> create_convolver(const ITensor } } // namespace -struct CpuDepthwiseConvolutionAssemblyDispatch::LocalImpl +struct CpuDepthwiseConv2dAssemblyDispatch::LocalImpl { std::unique_ptr<depthwise::IDepthwiseConvolution> dwc_assembly_kernel{ nullptr }; NEDepthwiseConvolutionAssemblyKernelWrapper dwc_acl_kernel{}; @@ -336,36 +336,36 @@ struct CpuDepthwiseConvolutionAssemblyDispatch::LocalImpl }; #ifndef DOXYGEN_SKIP_THIS -CpuDepthwiseConvolutionAssemblyDispatch::CpuDepthwiseConvolutionAssemblyDispatch() +CpuDepthwiseConv2dAssemblyDispatch::CpuDepthwiseConv2dAssemblyDispatch() : _pImpl(std::make_unique<LocalImpl>()) { } #endif /* DOXYGEN_SKIP_THIS */ -CpuDepthwiseConvolutionAssemblyDispatch::~CpuDepthwiseConvolutionAssemblyDispatch() = default; +CpuDepthwiseConv2dAssemblyDispatch::~CpuDepthwiseConv2dAssemblyDispatch() = default; -void CpuDepthwiseConvolutionAssemblyDispatch::configure(const ITensorInfo *input, - const ITensorInfo *weights, - const ITensorInfo *bias, - ITensorInfo *output, - const ConvolutionInfo &info) +void CpuDepthwiseConv2dAssemblyDispatch::configure(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *bias, + ITensorInfo *dst, + const ConvolutionInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); ARM_COMPUTE_UNUSED(bias); - ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionAssemblyDispatch::validate(input, - weights, - bias != nullptr ? bias : nullptr, - output, - info)); + ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConv2dAssemblyDispatch::validate(src, + weights, + bias != nullptr ? bias : nullptr, + dst, + info)); // Output auto inizialitation if not yet initialized - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); - auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info())); + const TensorShape dst_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); + auto_init_if_empty(*dst, src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(dst_shape).set_quantization_info(dst->quantization_info())); _pImpl->is_prepared = false; // Create convolver - _pImpl->dwc_assembly_kernel = create_convolver(input, weights, output, info); + _pImpl->dwc_assembly_kernel = create_convolver(src, weights, dst, info); ARM_COMPUTE_ERROR_ON(_pImpl->dwc_assembly_kernel == nullptr); // Create assembly kernel wrapper @@ -386,27 +386,27 @@ void CpuDepthwiseConvolutionAssemblyDispatch::configure(const ITensorInfo *i _pImpl->mem_req.push_back({ TensorType::ACL_INT_1, pack_tensor_size, alignment }); } -experimental::MemoryRequirements CpuDepthwiseConvolutionAssemblyDispatch::workspace() const +experimental::MemoryRequirements CpuDepthwiseConv2dAssemblyDispatch::workspace() const { return _pImpl->mem_req; } -Status CpuDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo *input, - const ITensorInfo *weights, - const ITensorInfo *bias, - const ITensorInfo *output, - const ConvolutionInfo &info) +Status CpuDepthwiseConv2dAssemblyDispatch::validate(const ITensorInfo *src, + const ITensorInfo *weights, + const ITensorInfo *bias, + const ITensorInfo *dst, + const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::F16, DataType::F32); if(weights->data_type() != DataType::QSYMM8_PER_CHANNEL) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights); } - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, weights); // Validate convolver - ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(input, weights, info)); + ARM_COMPUTE_RETURN_ERROR_ON(!is_optimized_supported(src, weights, info)); // Validate activation const bool is_relu = arm_compute::utils::info_helpers::is_relu(info.act_info); @@ -416,50 +416,50 @@ Status CpuDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo * // Check bias if(bias != nullptr) { - unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); + unsigned int channel_idx = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL); ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights->dimension(channel_idx)); } // Check output - if(output->total_size() != 0) + if(dst->total_size() != 0) { - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + const TensorShape dst_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), dst_shape); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); } // The uniform quantization case will only have 1 scale value in the weights quantization info - const UniformQuantizationInfo input_qinfo = input->quantization_info().uniform(); + const UniformQuantizationInfo src_qinfo = src->quantization_info().uniform(); const QuantizationInfo weights_qinfo = weights->quantization_info(); - const UniformQuantizationInfo output_qinfo = output->quantization_info().uniform(); + const UniformQuantizationInfo dst_qinfo = dst->quantization_info().uniform(); for(auto const s : weights_qinfo.scale()) { - const float fmultipler = input_qinfo.scale * s / output_qinfo.scale; + const float fmultipler = src_qinfo.scale * s / dst_qinfo.scale; ARM_COMPUTE_RETURN_ERROR_ON(fmultipler > 1.f); } return Status{}; } -bool CpuDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITensorInfo *input, - const ITensorInfo *weights, - const ConvolutionInfo &info) +bool CpuDepthwiseConv2dAssemblyDispatch::is_optimized_supported(const ITensorInfo *src, + const ITensorInfo *weights, + const ConvolutionInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights); // Reshape input shape if in NHWC format - const DataLayout data_layout = input->data_layout(); - TensorShape in_shape{ input->tensor_shape() }; + const DataLayout data_layout = src->data_layout(); + TensorShape in_shape{ src->tensor_shape() }; if(data_layout == DataLayout::NHWC) { - in_shape.set(Window::DimX, input->tensor_shape().y()); - in_shape.set(Window::DimY, input->tensor_shape().z()); - in_shape.set(Window::DimZ, input->tensor_shape().x()); + in_shape.set(Window::DimX, src->tensor_shape().y()); + in_shape.set(Window::DimY, src->tensor_shape().z()); + in_shape.set(Window::DimZ, src->tensor_shape().x()); } // Check data type - const DataType input_type = input->data_type(); + const DataType input_type = src->data_type(); const bool is_input_type_valid = is_data_type_float(input_type) || input_type == DataType::QASYMM8; const DataType weights_type = weights->data_type(); const bool is_weights_type_valid = is_data_type_float(weights_type) || weights_type == DataType::QASYMM8 || weights_type == DataType::QASYMM8_SIGNED @@ -497,7 +497,7 @@ bool CpuDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITens return is_input_type_valid && is_weights_type_valid && weights_supported && supported_strides && supported_padding && (info.depth_multiplier == 1) && is_dilation_supported; } -void CpuDepthwiseConvolutionAssemblyDispatch::run(ITensorPack &tensors) +void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors) { // Prepare assembly kernel prepare(tensors); @@ -530,7 +530,7 @@ void CpuDepthwiseConvolutionAssemblyDispatch::run(ITensorPack &tensors) NEScheduler::get().schedule(&_pImpl->dwc_acl_kernel, Window::DimX); } -void CpuDepthwiseConvolutionAssemblyDispatch::prepare(ITensorPack &tensors) +void CpuDepthwiseConv2dAssemblyDispatch::prepare(ITensorPack &tensors) { if(!_pImpl->is_prepared) { diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h index 6aac74c3ef..195942b7fd 100644 --- a/src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h +++ b/src/runtime/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.h @@ -21,9 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H -#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H +#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2DASSEMBLYDISPATCH_H +#define ARM_COMPUTE_CPU_DEPTHWISECONV2DASSEMBLYDISPATCH_H +#include "src/core/common/Macros.h" #include "src/runtime/cpu/ICpuOperator.h" namespace arm_compute @@ -31,57 +32,45 @@ namespace arm_compute namespace cpu { /** Depthwise convolution assembly kernel glue */ -class CpuDepthwiseConvolutionAssemblyDispatch : public ICpuOperator +class CpuDepthwiseConv2dAssemblyDispatch : public ICpuOperator { public: - CpuDepthwiseConvolutionAssemblyDispatch(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuDepthwiseConvolutionAssemblyDispatch(const CpuDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move constructor */ - CpuDepthwiseConvolutionAssemblyDispatch(CpuDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuDepthwiseConvolutionAssemblyDispatch &operator=(const CpuDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move assignment operator */ - CpuDepthwiseConvolutionAssemblyDispatch &operator=(CpuDepthwiseConvolutionAssemblyDispatch &&) = default; + /** Default constructor */ + CpuDepthwiseConv2dAssemblyDispatch(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConv2dAssemblyDispatch); /** Default destructor */ - ~CpuDepthwiseConvolutionAssemblyDispatch(); + ~CpuDepthwiseConv2dAssemblyDispatch(); + /** Initialize the function's source, destination, kernels and border_size. * * @note Supports only NHWC format * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. + * @param[in] src Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). + * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p src. * @param[in] bias (Optional) Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor info. Data type supported: same as @p input. + * Data type supported: Same as @p src. + * @param[out] dst Destination tensor info. Data type supported: same as @p src. * @param[in] info Depthwise convolution meta-data. */ - void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *output, const ConvolutionInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionAssemblyDispatch - * - * @note Supports only NHWC format + void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const ConvolutionInfo &info); + /** Static function to check if given info will lead to a valid configuration * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor info. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor info. Data type supported: same as @p input. - * @param[in] info Depthwise convolution meta-data. + * Similar to CpuDepthwiseConv2dAssemblyDispatch::configure() * - * @return An error status + * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const ConvolutionInfo &info); + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const ConvolutionInfo &info); /** Check if the optimized kernel can be used for the given kernel sizes and strides * * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC * - * @param[in] input Input tensor info. + * @param[in] src Input tensor info. * @param[in] weights Weights tensor info. * @param[in] info Depthwise convolution meta-data. * * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed. */ - static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, const ConvolutionInfo &info); + static bool is_optimized_supported(const ITensorInfo *src, const ITensorInfo *weights, const ConvolutionInfo &info); // Inherited methods overridden: void run(ITensorPack &tensors) override; @@ -94,4 +83,4 @@ private: }; } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */ +#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2DASSEMBLYDISPATCH_H */ diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.h b/src/runtime/cpu/operators/CpuDepthwiseConvolution.h deleted file mode 100644 index e39cb7db4d..0000000000 --- a/src/runtime/cpu/operators/CpuDepthwiseConvolution.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPU_DEQUANTIZATION_H -#define ARM_COMPUTE_CPU_DEQUANTIZATION_H - -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/experimental/Types.h" -#include "src/core/cpu/ICpuKernel.h" -#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h" -#include "src/runtime/cpu/ICpuOperator.h" -#include "src/runtime/cpu/operators/CpuActivation.h" -#include "src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.h" -#include "src/runtime/cpu/operators/CpuPermute.h" - -#include <memory> - -namespace arm_compute -{ -namespace cpu -{ -/** Function to execute a depthwise convolution. - */ -class CpuDepthwiseConvolution : public ICpuOperator -{ -public: - /** Default constructor */ - CpuDepthwiseConvolution(); - /** Initialize the function's source, destination, weights and convolution information. - * - * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[out] output Destination tensor info. Data type supported: same as @p input. - * @param[in] weights Weights tensor info. These are 3D tensor infos with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] info Depthwise convolution meta-data. - */ - void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolution - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] output Destination tensor info. Data type supported: same as @p input. - * @param[in] weights Weights tensor info. These are 3D tensors info with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] info Depthwise convolution meta-data. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info); - - /** Static function to choose the best depthwise convolution function for @ref CpuDepthwiseConvolution - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data type supported: same as @p input. - * @param[in] info Depthwise convolution meta-data. - * - * @return a Depthwise Convolution Function - */ - static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const ConvolutionInfo &info); - - // Inherited methods overriden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - -private: - /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels: - * - * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported - * - * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0) and no assembly kernel implementation is present - * -# @ref CpuDepthwiseConvolution3x3Kernel if 3x3 and no assembly kernel implementation is present - * -# @ref NEDepthwiseConvolutionAssemblyDispatch if assembly kernel implementation is present - * -# @ref NEDirectConvolutionLayerOutputStageKernel if re-quantization of output is required - * -# @ref NEActivationLayer if fused activation is required - * - */ - class CpuDepthwiseConvolutionOptimizedInternal : public ICpuOperator - { - public: - /** Default constructor */ - CpuDepthwiseConvolutionOptimizedInternal(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuDepthwiseConvolutionOptimizedInternal(const CpuDepthwiseConvolutionOptimizedInternal &) = delete; - /** Default move constructor */ - CpuDepthwiseConvolutionOptimizedInternal(CpuDepthwiseConvolutionOptimizedInternal &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuDepthwiseConvolutionOptimizedInternal &operator=(const CpuDepthwiseConvolutionOptimizedInternal &) = delete; - /** Default move assignment operator */ - CpuDepthwiseConvolutionOptimizedInternal &operator=(CpuDepthwiseConvolutionOptimizedInternal &&) = default; - /** Default destructor */ - ~CpuDepthwiseConvolutionOptimizedInternal() = default; - /** Initialize the function's source, destination, kernels and border_size. - * - * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor info. Data type supported: same as @p input. - * @param[in] info Depthwise convolution meta-data. - */ - void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolution3x3 - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: same as @p input. - * @param[in] info Depthwise convolution meta-data. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info); - - // Inherited methods overriden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - - private: - std::unique_ptr<CpuDepthwiseConvolutionAssemblyDispatch> _dwc_optimized_func{ nullptr }; - std::unique_ptr<CpuPermute> _permute_input{ nullptr }; - std::unique_ptr<CpuPermute> _permute_weights{ nullptr }; - std::unique_ptr<CpuPermute> _permute_output{ nullptr }; - std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr }; - bool _has_bias{ false }; - bool _is_quantized{ false }; - bool _is_nchw{ true }; - bool _permute{ false }; - bool _is_activationlayer_enabled{ false }; - bool _is_prepared{ false }; - }; - - /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: - * - * -# @ref CpuDepthwiseConvolutionNativeKernel - * - */ - class CpuDepthwiseConvolutionGeneric : public ICpuOperator - { - public: - /** Default constructor */ - CpuDepthwiseConvolutionGeneric(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuDepthwiseConvolutionGeneric(const CpuDepthwiseConvolutionGeneric &) = delete; - /** Default move constructor */ - CpuDepthwiseConvolutionGeneric(CpuDepthwiseConvolutionGeneric &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuDepthwiseConvolutionGeneric &operator=(const CpuDepthwiseConvolutionGeneric &) = delete; - /** Default move assignment operator */ - CpuDepthwiseConvolutionGeneric &operator=(CpuDepthwiseConvolutionGeneric &&) = default; - /** Default destructor */ - ~CpuDepthwiseConvolutionGeneric() = default; - /** Initialize the function's source, destination, weights and convolution information. - * - * @param[in, out] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). - * @param[out] output Destination tensor info. Data type supported: same as @p input. - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] info Depthwise convolution meta-data. - */ - void configure(ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info); - - /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionGeneric - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling). - * @param[in] output Destination tensor info. Data type supported: same as @p input. - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] info Depthwise convolution meta-data. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info); - - // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; - - private: - std::unique_ptr<kernels::CpuDepthwiseConvolutionNativeKernel> _depthwise_conv_kernel{ nullptr }; - std::unique_ptr<CpuPermute> _permute_input{ nullptr }; - std::unique_ptr<CpuPermute> _permute_weights{ nullptr }; - std::unique_ptr<CpuPermute> _permute_output{ nullptr }; - std::unique_ptr<CpuActivation> _activationlayer_function{ nullptr }; - bool _is_nchw{ true }; - bool _is_prepared{ false }; - bool _is_activationlayer_enabled{ false }; - }; - - DepthwiseConvolutionFunction _depth_conv_func; - CpuDepthwiseConvolutionOptimizedInternal _func_optimized; - CpuDepthwiseConvolutionGeneric _func_generic; -}; -} // namespace cpu -} // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_DEQUANTIZATION_H */ diff --git a/src/runtime/cpu/operators/CpuDirectConvolution.cpp b/src/runtime/cpu/operators/CpuDirectConv2d.cpp index 33f79603e8..8812b777a3 100644 --- a/src/runtime/cpu/operators/CpuDirectConvolution.cpp +++ b/src/runtime/cpu/operators/CpuDirectConv2d.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/runtime/cpu/operators/CpuDirectConvolution.h" +#include "src/runtime/cpu/operators/CpuDirectConv2d.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/Utils.h" @@ -32,19 +32,19 @@ namespace arm_compute { namespace cpu { -CpuDirectConvolution::~CpuDirectConvolution() = default; +CpuDirectConv2d::~CpuDirectConv2d() = default; -CpuDirectConvolution::CpuDirectConvolution(std::shared_ptr<IMemoryManager> memory_manager) +CpuDirectConv2d::CpuDirectConv2d(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), _output_stage_kernel(), _conv_kernel(), _input_border_handler(), _activationlayer_function(), _accumulator(), _has_bias(false), _is_activationlayer_enabled(false), _dim_split(Window::DimZ), _is_padding_required() { } -void CpuDirectConvolution::configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) +void CpuDirectConv2d::configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN); - _output_stage_kernel = std::make_unique<kernels::CpuDirectConvolutionOutputStageKernel>(); - _conv_kernel = std::make_unique<kernels::CpuDirectConvolutionKernel>(); + _output_stage_kernel = std::make_unique<kernels::CpuDirectConv2dOutputStageKernel>(); + _conv_kernel = std::make_unique<kernels::CpuDirectConv2dKernel>(); _input_border_handler = std::make_unique<NEFillBorderKernel>(); // Free accumulator @@ -80,8 +80,8 @@ void CpuDirectConvolution::configure(ITensorInfo *src, ITensorInfo *weights, con } } -Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const ActivationLayerInfo &act_info) +Status CpuDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const PadStrideInfo &conv_info, + const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); @@ -90,7 +90,7 @@ Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo TensorInfo accumulator(dst->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type)); // Validate Convolution kernel - ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConvolutionKernel::validate(src, weights, &accumulator, conv_info)); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConv2dKernel::validate(src, weights, &accumulator, conv_info)); if(bias != nullptr) { @@ -101,7 +101,7 @@ Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo } // Validate bias kernel - ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConvolutionOutputStageKernel::validate(&accumulator, bias, dst)); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::CpuDirectConv2dOutputStageKernel::validate(&accumulator, bias, dst)); if(act_info.enabled()) { @@ -111,7 +111,7 @@ Status CpuDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo return Status{}; } -void CpuDirectConvolution::run(ITensorPack &tensors) +void CpuDirectConv2d::run(ITensorPack &tensors) { MemoryGroupResourceScope scope_mg(_memory_group); diff --git a/src/runtime/cpu/operators/CpuDirectConvolution.h b/src/runtime/cpu/operators/CpuDirectConv2d.h index 0635e087fd..9e584b9c49 100644 --- a/src/runtime/cpu/operators/CpuDirectConvolution.h +++ b/src/runtime/cpu/operators/CpuDirectConv2d.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DIRECTCONVOLUTION_H -#define ARM_COMPUTE_CPU_DIRECTCONVOLUTION_H +#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_H +#define ARM_COMPUTE_CPU_DIRECTCONV2D_H #include "arm_compute/core/ITensorInfo.h" #include "arm_compute/core/Types.h" @@ -33,8 +33,8 @@ #include "arm_compute/runtime/Tensor.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "src/core/cpu/ICpuKernel.h" -#include "src/core/cpu/kernels/CpuDirectConvolutionKernel.h" -#include "src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h" +#include "src/core/cpu/kernels/CpuDirectConv2dKernel.h" +#include "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h" #include "src/runtime/cpu/ICpuOperator.h" #include "src/runtime/cpu/operators/CpuActivation.h" @@ -49,16 +49,16 @@ namespace cpu * This function calls the following kernels: * * -# @ref NEFillBorderKernel for the input - * -# @ref kernels::CpuDirectConvolutionOutputStageKernel - * -# @ref kernels::CpuDirectConvolutionKernel + * -# @ref kernels::CpuDirectConv2dOutputStageKernel + * -# @ref kernels::CpuDirectConv2dKernel */ -class CpuDirectConvolution : public ICpuOperator +class CpuDirectConv2d : public ICpuOperator { public: /** Constructor */ - CpuDirectConvolution(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + CpuDirectConv2d(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Destructor */ - ~CpuDirectConvolution(); + ~CpuDirectConv2d(); /** Set the input, weights, biases and output tensors. * * @note: DirectConvolution only works in the following configurations: @@ -78,23 +78,9 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. */ void configure(ITensorInfo *src, ITensorInfo *weights, const ITensorInfo *bias, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayer + /** Static function to check if given info will lead to a valid configuration * - * @note: DirectConvolution only works in the following configurations: - * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 - * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F16/F32 - * 5x5 convolution with stride_x = 1/2/3, stride_y = 1/2/3 data type = F32 - * - * @param[in] src Input tensor info. Data types supported: F16/F32. - * @param[in] weights Set of kernels to convolve the input volume. - * Supported sizes: 1x1, 3x3 and 5x5. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported: Same as @p src. - * @param[in] bias Set of biases. Can be nullptr. Data type supported: Same as @p src. - * @param[in] dst Output tensor info. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * Similar to CpuDirectConv2d::configure() * * @return a status */ @@ -105,17 +91,17 @@ public: void run(ITensorPack &tensors) override; private: - MemoryGroup _memory_group; - std::unique_ptr<kernels::CpuDirectConvolutionOutputStageKernel> _output_stage_kernel; - std::unique_ptr<kernels::CpuDirectConvolutionKernel> _conv_kernel; - std::unique_ptr<NEFillBorderKernel> _input_border_handler; - std::unique_ptr<CpuActivation> _activationlayer_function; - Tensor _accumulator; - bool _has_bias{ false }; - bool _is_activationlayer_enabled{ false }; - unsigned int _dim_split{ 0 }; - bool _is_padding_required{ false }; + MemoryGroup _memory_group; + std::unique_ptr<kernels::CpuDirectConv2dOutputStageKernel> _output_stage_kernel; + std::unique_ptr<kernels::CpuDirectConv2dKernel> _conv_kernel; + std::unique_ptr<NEFillBorderKernel> _input_border_handler; + std::unique_ptr<CpuActivation> _activationlayer_function; + Tensor _accumulator; + bool _has_bias{ false }; + bool _is_activationlayer_enabled{ false }; + unsigned int _dim_split{ 0 }; + bool _is_padding_required{ false }; }; } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_DIRECTCONVOLUTION_H */ +#endif /* ARM_COMPUTE_CPU_DIRECTCONV2D_H */ diff --git a/src/runtime/cpu/operators/CpuPooling.cpp b/src/runtime/cpu/operators/CpuPool2d.cpp index 3a6ac24a74..b225199c40 100644 --- a/src/runtime/cpu/operators/CpuPooling.cpp +++ b/src/runtime/cpu/operators/CpuPool2d.cpp @@ -21,20 +21,20 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/runtime/cpu/operators/CpuPooling.h" +#include "src/runtime/cpu/operators/CpuPool2d.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" -#include "src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h" -#include "src/core/cpu/kernels/CpuPoolingKernel.h" +#include "src/core/cpu/kernels/CpuPool2dKernel.h" +#include "src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h" namespace arm_compute { namespace cpu { -CpuPooling::CpuPooling() +CpuPool2d::CpuPool2d() : _pooling_layer_kernel(), _border_handler(), _asm_glue(), @@ -44,12 +44,12 @@ CpuPooling::CpuPooling() { } -CpuPooling::~CpuPooling() = default; +CpuPool2d::~CpuPool2d() = default; -void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) +void CpuPool2d::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { // Check if we can run assembly kernels. Currently, indices are not supported by those kernels - const bool run_optimised = bool(kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr); + const bool run_optimised = bool(kernels::CpuPool2dAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr); // Get data layout _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? src->data_layout() : pool_info.data_layout; @@ -64,7 +64,7 @@ void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLaye const CPUInfo &ci = NEScheduler::get().cpu_info(); const unsigned int num_threads = NEScheduler::get().num_threads(); - auto pooling_wrapper = std::make_unique<kernels::CpuPoolingAssemblyWrapperKernel>(); + auto pooling_wrapper = std::make_unique<kernels::CpuPool2dAssemblyWrapperKernel>(); ARM_COMPUTE_ERROR_ON(pooling_wrapper == nullptr); pooling_wrapper->configure(src, dst, pool_info, ci); @@ -78,7 +78,7 @@ void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLaye else { // Configure pooling kernel - auto k = std::make_unique<kernels::CpuPoolingKernel>(); + auto k = std::make_unique<kernels::CpuPool2dKernel>(); k->configure(src, dst, pool_info, indices); _pooling_layer_kernel = std::move(k); @@ -106,19 +106,19 @@ void CpuPooling::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLaye } } -Status CpuPooling::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) +Status CpuPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { - const bool run_optimised = bool(kernels::CpuPoolingAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr); + const bool run_optimised = bool(kernels::CpuPool2dAssemblyWrapperKernel::validate(src, dst, pool_info)) && (indices == nullptr); if(run_optimised) { return Status{}; } - return kernels::CpuPoolingKernel::validate(src, dst, pool_info, indices); + return kernels::CpuPool2dKernel::validate(src, dst, pool_info, indices); } -void CpuPooling::run(ITensorPack &tensors) +void CpuPool2d::run(ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No tensors provided"); @@ -148,7 +148,7 @@ void CpuPooling::run(ITensorPack &tensors) } } -experimental::MemoryRequirements CpuPooling::workspace() const +experimental::MemoryRequirements CpuPool2d::workspace() const { return _mem_req; } diff --git a/src/runtime/cpu/operators/CpuPooling.h b/src/runtime/cpu/operators/CpuPool2d.h index bc30adf762..ae3d115dfc 100644 --- a/src/runtime/cpu/operators/CpuPooling.h +++ b/src/runtime/cpu/operators/CpuPool2d.h @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_POOLING_H -#define ARM_COMPUTE_CPU_POOLING_H - -#include "src/runtime/cpu/ICpuOperator.h" +#ifndef ARM_COMPUTE_CPU_POOL2D_H +#define ARM_COMPUTE_CPU_POOL2D_H #include "arm_compute/core/experimental/Types.h" +#include "src/core/common/Macros.h" +#include "src/runtime/cpu/ICpuOperator.h" #include <memory> @@ -40,24 +40,17 @@ namespace cpu /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels: * * -# @ref NEFillBorderKernel (executed if padding size is different from zero) - * -# @ref kernels::CpuPoolingKernel - * -# @ref kernels::CpuPoolingAssemblyWrapperKernel + * -# @ref kernels::CpuPool2dKernel + * -# @ref kernels::CpuPool2dAssemblyWrapperKernel */ -class CpuPooling : public ICpuOperator +class CpuPool2d : public ICpuOperator { public: /** Constructor */ - CpuPooling(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuPooling(const CpuPooling &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CpuPooling &operator=(const CpuPooling &) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - CpuPooling(CpuPooling &&) = delete; - /** Prevent instances of this class from being moved (As this class contains non movable objects) */ - CpuPooling &operator=(CpuPooling &&) = delete; + CpuPool2d(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2d); /** Default destructor */ - ~CpuPooling(); + ~CpuPool2d(); /** Set the src and dst tensors. * * @note F16 is supported for pool sizes 2 and 3 only @@ -68,14 +61,9 @@ public: * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. */ void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CpuPooling - * - * @note F16 is supported for pool sizes 2 and 3 only + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Source tensor info. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: same as @p src. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) Tensor info of the indices of the maximal values. Data type supported: U32. + * Similar to CpuPool2d::configure() * * @return a status */ @@ -96,4 +84,4 @@ private: }; } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_POOLING_H */ +#endif /* ARM_COMPUTE_CPU_POOL2D_H */ diff --git a/src/runtime/gpu/cl/operators/ClDirectConvolution.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp index 3382a6c3c5..527b3a65f9 100644 --- a/src/runtime/gpu/cl/operators/ClDirectConvolution.cpp +++ b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/runtime/gpu/cl/operators/ClDirectConvolution.h" +#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/gpu/cl/ClCompileContext.h" #include "src/core/gpu/cl/kernels/ClActivationKernel.h" -#include "src/core/gpu/cl/kernels/ClDirectConvolutionKernel.h" +#include "src/core/gpu/cl/kernels/ClDirectConv2dKernel.h" namespace arm_compute { @@ -44,11 +44,11 @@ ITensorPack select_activation_src_dst(ITensorPack &tensors) } } // namespace -void ClDirectConvolution::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) +void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { // Configure direct convolution kernel - auto k = std::make_unique<kernels::ClDirectConvolutionKernel>(); + auto k = std::make_unique<kernels::ClDirectConv2dKernel>(); k->set_target(CLScheduler::get().target()); k->configure(compile_context, src, weights, biases, dst, conv_info); _direct_conv_kernel = std::move(k); @@ -74,10 +74,10 @@ void ClDirectConvolution::configure(const CLCompileContext &compile_context, ITe CLScheduler::get().tune_kernel_static(*_direct_conv_kernel); } -Status ClDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) +Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConvolutionKernel::validate(src, weights, biases, dst, conv_info, CLScheduler::get().target())); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, CLScheduler::get().target())); if(act_info.enabled()) { ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info)); @@ -85,7 +85,7 @@ Status ClDirectConvolution::validate(const ITensorInfo *src, const ITensorInfo * return Status{}; } -void ClDirectConvolution::run(ITensorPack &tensors) +void ClDirectConv2d::run(ITensorPack &tensors) { // Run border handler CLScheduler::get().enqueue_op(*_src_border_handler.get(), tensors, false); diff --git a/src/runtime/gpu/cl/operators/ClDirectConvolution.h b/src/runtime/gpu/cl/operators/ClDirectConv2d.h index e7ad927b0b..e069733fab 100644 --- a/src/runtime/gpu/cl/operators/ClDirectConvolution.h +++ b/src/runtime/gpu/cl/operators/ClDirectConv2d.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_DIRECT_CONVOLUTION_H -#define ARM_COMPUTE_CL_DIRECT_CONVOLUTION_H +#ifndef ARM_COMPUTE_CL_DIRECT_CONV2D_H +#define ARM_COMPUTE_CL_DIRECT_CONV2D_H #include "src/core/gpu/cl/ClCompileContext.h" #include "src/core/gpu/cl/IClKernel.h" @@ -37,13 +37,13 @@ namespace opencl /** Basic function to simulate a directly convolution layer. This function calls the following OpenCL kernels: * * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref opencl::ClDirectConvolution + * -# @ref opencl::ClDirectConv2d */ -class ClDirectConvolution : public IClOperator +class ClDirectConv2d : public IClOperator { public: /** Constructor */ - ClDirectConvolution() = default; + ClDirectConv2d() = default; /** Set the src and dst tensors. * * @param[in] compile_context The compile context to be used. @@ -61,18 +61,9 @@ public: */ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref ClDirectConvolution + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of srcs. - * Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p src data type, except for src of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type. - * @param[in] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. - * Data types supported: Same as @p src. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * Similar to ClDirectConv2d::configure() * * @return a status */ @@ -89,4 +80,4 @@ private: }; } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_DIRECT_CONVOLUTION_H */
\ No newline at end of file +#endif /* ARM_COMPUTE_CL_DIRECT_CONV2D_H */
\ No newline at end of file diff --git a/src/runtime/gpu/cl/operators/ClPooling.cpp b/src/runtime/gpu/cl/operators/ClPool2d.cpp index 8610eb9842..40c2b0a8ba 100644 --- a/src/runtime/gpu/cl/operators/ClPooling.cpp +++ b/src/runtime/gpu/cl/operators/ClPool2d.cpp @@ -21,23 +21,23 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/runtime/gpu/cl/operators/ClPooling.h" +#include "src/runtime/gpu/cl/operators/ClPool2d.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" #include "src/core/gpu/cl/ClCompileContext.h" -#include "src/core/gpu/cl/kernels/ClPoolingKernel.h" +#include "src/core/gpu/cl/kernels/ClPool2dKernel.h" namespace arm_compute { namespace opencl { -void ClPooling::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices) +void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(src); // Configure pooling kernel - auto k = std::make_unique<kernels::ClPoolingKernel>(); + auto k = std::make_unique<kernels::ClPool2dKernel>(); k->set_target(CLScheduler::get().target()); k->configure(compile_context, src, dst, info, indices); _pooling = std::move(k); @@ -85,12 +85,12 @@ void ClPooling::configure(const ClCompileContext &compile_context, ITensorInfo * CLScheduler::get().tune_kernel_static(*_pooling); } -Status ClPooling::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices) +Status ClPool2d::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info, const ITensorInfo *indices) { - return kernels::ClPoolingKernel::validate(src, dst, info, indices); + return kernels::ClPool2dKernel::validate(src, dst, info, indices); } -void ClPooling::run(ITensorPack &tensors) +void ClPool2d::run(ITensorPack &tensors) { ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided"); diff --git a/src/runtime/gpu/cl/operators/ClPooling.h b/src/runtime/gpu/cl/operators/ClPool2d.h index 99de6d0dcf..8ac386a64b 100644 --- a/src/runtime/gpu/cl/operators/ClPooling.h +++ b/src/runtime/gpu/cl/operators/ClPool2d.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_POOLING_H -#define ARM_COMPUTE_CL_POOLING_H +#ifndef ARM_COMPUTE_CL_POOL2D_H +#define ARM_COMPUTE_CL_POOL2D_H #include "src/core/gpu/cl/ClCompileContext.h" #include "src/runtime/gpu/cl/IClOperator.h" @@ -36,13 +36,13 @@ namespace opencl /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels: * * -# @ref CLFillBorderKernel (executed if padding size is different from zero) - * -# @ref opencl::ClPooling + * -# @ref opencl::ClPool2d */ -class ClPooling : public IClOperator +class ClPool2d : public IClOperator { public: /** Constructor */ - ClPooling() = default; + ClPool2d() = default; /** Configure operator for a given list of arguments * * @param[in] compile_context The compile context to be used. @@ -52,12 +52,9 @@ public: * @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32. */ void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref ClPooling + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[out] dst Destination tensor info. Data type supported: same as @p src - * @param[in] info Pooling layer parameters. - * @param[out] indices (optional) The indices info of the maximal values. Data type supported: U32. + * Similar to ClPool2d::configure() * * @return a status */ @@ -72,4 +69,4 @@ private: }; } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_POOLING_H */ +#endif /* ARM_COMPUTE_CL_POOL2D_H */ |