diff options
Diffstat (limited to 'src/core/cpu/kernels')
-rw-r--r-- | src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp (renamed from src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp) | 147 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h (renamed from src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h) | 40 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dKernel.cpp (renamed from src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp) | 16 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dKernel.h (renamed from src/core/cpu/kernels/CpuDirectConvolutionKernel.h) | 25 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp (renamed from src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp) | 16 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h (renamed from src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h) | 26 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuPool2dKernel.cpp (renamed from src/core/cpu/kernels/CpuPoolingKernel.cpp) | 14 | ||||
-rw-r--r-- | src/core/cpu/kernels/CpuPool2dKernel.h (renamed from src/core/cpu/kernels/CpuPoolingKernel.h) | 21 | ||||
-rw-r--r-- | src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp (renamed from src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp) | 16 | ||||
-rw-r--r-- | src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h (renamed from src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h) | 26 |
10 files changed, 159 insertions, 188 deletions
diff --git a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp index a5d1b61c08..4ddb35f2d5 100644 --- a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp +++ b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h" +#include "src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/ITensorInfo.h" @@ -74,7 +74,7 @@ struct DepthwiseConvolutionRunInfo const size_t input_width; const size_t input_depth; - DepthwiseConvolutionRunInfo(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info, const Window &w, uint32_t depth_multiplier = 1) + DepthwiseConvolutionRunInfo(const ITensorInfo &input, const ITensorInfo &weights, const PadStrideInfo &conv_info, const Window &w, uint32_t depth_multiplier = 1) // NOLINT : num_read_elements_per_iteration((depth_multiplier == 1 ? (vector_size / element_size_from_data_type(input.data_type())) : 1)), x_start(w.x().start()), x_end(w.x().end()), @@ -110,14 +110,14 @@ inline bool is_valid_input_region(int32_t base_w, uint32_t base_h, uint32_t w, u } template <typename T> -void depthwise_loop_multiplier1_fp(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, +void depthwise_loop_multiplier1_fp(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, const Size2D &dilation, const Window &window, bool has_biases) { constexpr auto element_per_vector = vector_size / sizeof(T); using VectorType = typename wrapper::traits::neon_vector<T, element_per_vector>::type; using TagType = typename wrapper::traits::neon_vector<T, element_per_vector>::tag_type; - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window); const VectorType zero_vector = wrapper::vdup_n(static_cast<T>(0), TagType{}); @@ -135,9 +135,9 @@ void depthwise_loop_multiplier1_fp(const ITensor *input, const ITensor *weights, Window win_output = window; win_output.set(Window::DimX, dim_manual_loop); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -224,10 +224,10 @@ void depthwise_loop_multiplier1_fp(const ITensor *input, const ITensor *weights, } template <typename T> -void depthwise_loop_generic_fp(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, +void depthwise_loop_generic_fp(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, const Size2D &dilation, unsigned int depth_multiplier, const Window &window, bool has_biases) { - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window, depth_multiplier); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window, depth_multiplier); Window execution_window = window; execution_window.set(Window::DimX, Window::Dimension(0, run_info.input_depth, 1)); @@ -246,9 +246,9 @@ void depthwise_loop_generic_fp(const ITensor *input, const ITensor *weights, con Window win_output = window; win_output.set_dimension_step(Window::DimX, run_info.x_step); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -306,23 +306,24 @@ void depthwise_loop_generic_fp(const ITensor *input, const ITensor *weights, con } template <typename T, typename TW> -void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const Size2D &dilation, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) +void depthwise_loop_multiplier1_quantized(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, + const Size2D &dilation, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) // NOLINT { + ARM_COMPUTE_UNUSED(output_multiplier, output_shift); constexpr auto element_per_vector = vector_size / sizeof(T); using VectorType = typename wrapper::traits::neon_vector<T, element_per_vector>::type; using TagType = typename wrapper::traits::neon_vector<T, element_per_vector>::tag_type; using AccType = int32_t; using AccArrayType = std::array<AccType, element_per_vector>; - const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()).get<T>(); + const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), src->info()->data_type(), src->info()->quantization_info()).get<T>(); const auto out_of_bound_vector = wrapper::vdup_n(static_cast<T>(out_of_bound_value), TagType{}); - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window); - const int32_t input_qoffset = input->info()->quantization_info().uniform().offset; + const int32_t input_qoffset = src->info()->quantization_info().uniform().offset; const int32_t weights_qoffset = weights->info()->quantization_info().uniform().offset; - const int32_t output_qoffset = output->info()->quantization_info().uniform().offset; + const int32_t output_qoffset = dst->info()->quantization_info().uniform().offset; const int32_t k_offset = run_info.weights_width * run_info.weights_height * input_qoffset * weights_qoffset; Window execution_window = window; @@ -339,9 +340,9 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w Window win_output = window; win_output.set(Window::DimX, dim_manual_loop); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -482,18 +483,18 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w } template <typename T, typename TW> -void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) +void depthwise_loop_generic_quantized(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, + const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) // NOLINT { using AccType = int32_t; - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window, depth_multiplier); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window, depth_multiplier); - const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), input->info()->data_type(), input->info()->quantization_info()).get<T>(); + const auto out_of_bound_value = PixelValue(static_cast<uint64_t>(0), src->info()->data_type(), src->info()->quantization_info()).get<T>(); - const int32_t input_qoffset = input->info()->quantization_info().uniform().offset; + const int32_t input_qoffset = src->info()->quantization_info().uniform().offset; const int32_t weights_qoffset = weights->info()->quantization_info().uniform().offset; - const int32_t output_qoffset = output->info()->quantization_info().uniform().offset; + const int32_t output_qoffset = dst->info()->quantization_info().uniform().offset; const int32_t k_offset = run_info.weights_width * run_info.weights_height * input_qoffset * weights_qoffset; Window execution_window = window; @@ -512,9 +513,9 @@ void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weigh Window win_output = window; win_output.set_dimension_step(Window::DimX, run_info.x_step); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -585,8 +586,8 @@ void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weigh } template <typename T, typename TW> -void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, - const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) +void depthwise_loop_pow2_quantized_per_tensor(const ITensor *src, const ITensor *weights, const ITensor *biases, ITensor *dst, const PadStrideInfo &conv_info, + const Size2D &dilation, unsigned int depth_multiplier, std::vector<int> output_multiplier, std::vector<int> output_shift, const Window &window, bool has_biases) // NOLINT { constexpr int half_vec = vector_size / 2; @@ -595,11 +596,11 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso using AccVectorTagType = typename wrapper::traits::neon_vector<AccType, half_vec>::tag_type; using TagType = typename wrapper::traits::neon_vector<T, vector_size>::tag_type; - const auto run_info = DepthwiseConvolutionRunInfo(*input->info(), *weights->info(), conv_info, window, depth_multiplier); + const auto run_info = DepthwiseConvolutionRunInfo(*src->info(), *weights->info(), conv_info, window, depth_multiplier); - const auto input_qoffset_vec = wrapper::vreinterpret(wrapper::vmovl(wrapper::vdup_n(static_cast<T>(input->info()->quantization_info().uniform().offset), TagType{}))); + const auto input_qoffset_vec = wrapper::vreinterpret(wrapper::vmovl(wrapper::vdup_n(static_cast<T>(src->info()->quantization_info().uniform().offset), TagType{}))); const auto weights_qoffset_vec = wrapper::vreinterpret(wrapper::vmovl(wrapper::vdup_n(static_cast<TW>(weights->info()->quantization_info().uniform().offset), TagType{}))); - const auto output_qoffset_vec = wrapper::vdup_n(output->info()->quantization_info().uniform().offset, arm_compute::wrapper::traits::vector_128_tag{}); + const auto output_qoffset_vec = wrapper::vdup_n(dst->info()->quantization_info().uniform().offset, arm_compute::wrapper::traits::vector_128_tag{}); const auto lower = wrapper::vdup_n(static_cast<AccType>(std::numeric_limits<T>::lowest()), AccVectorTagType{}); const auto upper = wrapper::vdup_n(static_cast<AccType>(std::numeric_limits<T>::max()), AccVectorTagType{}); @@ -624,9 +625,9 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso Window win_output = window; win_output.set_dimension_step(Window::DimX, run_info.x_step); - Iterator input_it(input, win_input); + Iterator input_it(src, win_input); Iterator weights_it(weights, win_weights); - Iterator output_it(output, win_output); + Iterator output_it(dst, win_output); Iterator biases_it{}; if(has_biases) @@ -722,16 +723,16 @@ void depthwise_loop_pow2_quantized_per_tensor(const ITensor *input, const ITenso input_it, weights_it, biases_it, output_it); } -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info) +Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src); + ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(info.depth_multiplier == 0); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (info.dilation.x() - 1) > input->dimension(1) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right()); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (info.dilation.y() - 1) > input->dimension(2) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom()); - ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(0) * info.depth_multiplier) != weights->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (info.dilation.x() - 1) > src->dimension(1) + info.pad_stride_info.pad_left() + info.pad_stride_info.pad_right()); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (info.dilation.y() - 1) > src->dimension(2) + info.pad_stride_info.pad_top() + info.pad_stride_info.pad_bottom()); + ARM_COMPUTE_RETURN_ERROR_ON((src->dimension(0) * info.depth_multiplier) != weights->dimension(0)); ARM_COMPUTE_RETURN_ERROR_ON((info.dilation.x() < 1) || (info.dilation.y() < 1)); ARM_COMPUTE_RETURN_ERROR_ON((info.pad_stride_info.stride().first < 1) || (info.pad_stride_info.stride().second < 1)); @@ -742,7 +743,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } else { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights); } if(biases != nullptr) @@ -750,7 +751,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(0)); - if(is_data_type_quantized_asymmetric(input->data_type())) + if(is_data_type_quantized_asymmetric(src->data_type())) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32); } @@ -760,36 +761,36 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } } - if(output->total_size() != 0) + if(dst->total_size() != 0) { - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), output_shape); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst); } return Status{}; } } // namespace -CpuDepthwiseConvolutionNativeKernel::CpuDepthwiseConvolutionNativeKernel() +CpuDepthwiseConv2dNativeKernel::CpuDepthwiseConv2dNativeKernel() : _func(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases() { } -void CpuDepthwiseConvolutionNativeKernel::configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info) +void CpuDepthwiseConv2dNativeKernel::configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, weights, (biases != nullptr) ? biases : nullptr, output, info)); + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, (biases != nullptr) ? biases : nullptr, dst, info)); _conv_info = info.pad_stride_info; _depth_multiplier = info.depth_multiplier; _dilation = info.dilation; _has_biases = (biases != nullptr); - if(is_data_type_quantized(input->data_type())) + if(is_data_type_quantized(src->data_type())) { - const auto input_scale = input->quantization_info().uniform().scale; - const auto output_scale = output->quantization_info().uniform().scale; + const auto input_scale = src->quantization_info().uniform().scale; + const auto output_scale = dst->quantization_info().uniform().scale; auto weights_scale = weights->quantization_info().scale(); if(!is_data_type_quantized_per_channel(weights->data_type())) @@ -815,50 +816,50 @@ void CpuDepthwiseConvolutionNativeKernel::configure(const ITensorInfo *input, co switch(weights->data_type()) { case DataType::QASYMM8: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, uint8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<uint8_t, uint8_t>; break; case DataType::QASYMM8_SIGNED: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<int8_t, int8_t>; break; case DataType::QSYMM8_PER_CHANNEL: - if(input->data_type() == DataType::QASYMM8) + if(src->data_type() == DataType::QASYMM8) { - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<uint8_t, int8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<uint8_t, int8_t>; } else { - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<int8_t, int8_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<int8_t, int8_t>; } break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float16_t, float16_t>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<float16_t, float16_t>; break; #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: - _func = &CpuDepthwiseConvolutionNativeKernel::run_depthwise<float, float>; + _func = &CpuDepthwiseConv2dNativeKernel::run_depthwise<float, float>; break; default: ARM_COMPUTE_ERROR("Data type not supported"); break; } - const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); - auto_init_if_empty(*output, input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(output->quantization_info())); + const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *weights, info); + auto_init_if_empty(*dst, src->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape).set_quantization_info(dst->quantization_info())); - Window win = calculate_max_window(*output, Steps()); + Window win = calculate_max_window(*dst, Steps()); ICpuKernel::configure(win); } -Status CpuDepthwiseConvolutionNativeKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info) +Status CpuDepthwiseConv2dNativeKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, info)); return Status{}; } -template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::FloatEnalber<T>> -void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, - ITensor *dst, const Window &window, bool has_biases) +template <typename T, typename TW, CpuDepthwiseConv2dNativeKernel::FloatEnalber<T>> +void CpuDepthwiseConv2dNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, + ITensor *dst, const Window &window, bool has_biases) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); @@ -873,9 +874,9 @@ void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, cons } } -template <typename T, typename TW, CpuDepthwiseConvolutionNativeKernel::Quantized8bitEnalber<T>> -void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, - ITensor *dst, const Window &window, bool has_biases) +template <typename T, typename TW, CpuDepthwiseConv2dNativeKernel::Quantized8bitEnalber<T>> +void CpuDepthwiseConv2dNativeKernel::run_depthwise(const ITensor *src, const ITensor *weights, const ITensor *biases, + ITensor *dst, const Window &window, bool has_biases) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window); @@ -900,7 +901,7 @@ void CpuDepthwiseConvolutionNativeKernel::run_depthwise(const ITensor *src, cons } } -void CpuDepthwiseConvolutionNativeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuDepthwiseConv2dNativeKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); diff --git a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h index 242536d441..559c46dc93 100644 --- a/src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h +++ b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H -#define ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H +#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H +#define ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/common/Macros.h" @@ -40,46 +40,38 @@ namespace cpu namespace kernels { /** Interface for the kernel to run a depthwise convolution native on a tensor. */ -class CpuDepthwiseConvolutionNativeKernel : public ICpuKernel +class CpuDepthwiseConv2dNativeKernel : public ICpuKernel { public: const char *name() const override { - return "CpuDepthwiseConvolutionNativeKernel"; + return "CpuDepthwiseConv2dNativeKernel"; } /** Default constructor */ - CpuDepthwiseConvolutionNativeKernel(); - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConvolutionNativeKernel); + CpuDepthwiseConv2dNativeKernel(); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConv2dNativeKernel); /** Initialize the function's source, destination and parameters. * * @note Supported data layouts: NHWC * - * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] src Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. + * Data type supported: Same as @p src or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p src is QASYMM8/QASYMM8_SIGNED. * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[out] output Destination tensor. Data type supported: Same as @p input. + * Data type supported: Same as @p src, S32 when src is QASYMM8/QASYMM8_SIGNED. + * @param[out] dst Destination tensor. Data type supported: Same as @p src. * @param[in] info Depthwise convolution meta-data. * */ - void configure(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ConvolutionInfo &info); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDepthwiseConvolutionNativeKernel + void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info); + /** Static function to check if given info will lead to a valid configuration * - * @note Supported data layouts: NHWC - * - * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] info Depthwise convolution meta-data. + * Similar to CpuDepthwiseConv2dNativeKernel::configure() * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const ConvolutionInfo &info); + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ConvolutionInfo &info); // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override; @@ -101,7 +93,7 @@ private: * * @param[in] window Region on which to execute the kernel. */ - using DepthwiseFunctionPtr = void (CpuDepthwiseConvolutionNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases); + using DepthwiseFunctionPtr = void (CpuDepthwiseConv2dNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases); DepthwiseFunctionPtr _func; PadStrideInfo _conv_info; @@ -114,4 +106,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_DEPTHWISECONVOLUTIONNATIVEKERNEL_H */ +#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H */ diff --git a/src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp b/src/core/cpu/kernels/CpuDirectConv2dKernel.cpp index 4f46eb2bf6..c0fc41525e 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp +++ b/src/core/cpu/kernels/CpuDirectConv2dKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuDirectConvolutionKernel.h" +#include "src/core/cpu/kernels/CpuDirectConv2dKernel.h" #include "src/core/NEON/kernels/detail/NEDirectConvolutionDetail.h" #include "src/core/NEON/wrapper/wrapper.h" @@ -995,7 +995,7 @@ bool have_zero_x_internal_padding(ITensorInfo *src, ITensorInfo *weights) } // namespace template <typename T> -void CpuDirectConvolutionKernel::convolve_nhwc_optimized(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) +void CpuDirectConv2dKernel::convolve_nhwc_optimized(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) { // This function assumes that input and weights have not padding in channel @@ -1116,7 +1116,7 @@ void CpuDirectConvolutionKernel::convolve_nhwc_optimized(const Window &window, c } template <typename T> -void CpuDirectConvolutionKernel::convolve_nhwc(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) +void CpuDirectConv2dKernel::convolve_nhwc(const Window &window, const ITensor *src, const ITensor *weights, ITensor *dst) { // Declare useful types using vtype = wrapper::traits::neon_bitvector<T, wrapper::traits::BitWidth::W128>; @@ -1219,12 +1219,12 @@ void CpuDirectConvolutionKernel::convolve_nhwc(const Window &window, const ITens out); } -BorderSize CpuDirectConvolutionKernel::border_size() const +BorderSize CpuDirectConv2dKernel::border_size() const { return _border_size; } -void CpuDirectConvolutionKernel::configure(ITensorInfo *src, ITensorInfo *weights, ITensorInfo *dst, const PadStrideInfo &conv_info) +void CpuDirectConv2dKernel::configure(ITensorInfo *src, ITensorInfo *weights, ITensorInfo *dst, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); @@ -1263,7 +1263,7 @@ void CpuDirectConvolutionKernel::configure(ITensorInfo *src, ITensorInfo *weight ICpuKernel::configure(win_config.second); } -Status CpuDirectConvolutionKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info) +Status CpuDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const PadStrideInfo &conv_info) { unsigned int num_weight_elems_read_per_row = 0; unsigned int num_elems_read_per_iteration = 0; @@ -1283,7 +1283,7 @@ Status CpuDirectConvolutionKernel::validate(const ITensorInfo *src, const ITenso return Status{}; } -void CpuDirectConvolutionKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -1376,7 +1376,7 @@ void CpuDirectConvolutionKernel::run_op(ITensorPack &tensors, const Window &wind } } } -const char *CpuDirectConvolutionKernel::name() const +const char *CpuDirectConv2dKernel::name() const { return "CpuDirectConvolutionLayerKernel"; } diff --git a/src/core/cpu/kernels/CpuDirectConvolutionKernel.h b/src/core/cpu/kernels/CpuDirectConv2dKernel.h index fb8218394b..62ed96f255 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionKernel.h +++ b/src/core/cpu/kernels/CpuDirectConv2dKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DIRECTCONVOLUTION_KERNEL_H -#define ARM_COMPUTE_CPU_DIRECTCONVOLUTION_KERNEL_H +#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H +#define ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H #include "src/core/common/Macros.h" #include "src/core/cpu/ICpuKernel.h" @@ -35,13 +35,13 @@ namespace cpu namespace kernels { /** Interface for the kernel to perform Direct Convolution Layer. */ -class CpuDirectConvolutionKernel : public ICpuKernel +class CpuDirectConv2dKernel : public ICpuKernel { public: /** Default constructor */ - CpuDirectConvolutionKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConvolutionKernel); - /** Set the input, weights, and output tensors. + CpuDirectConv2dKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv2dKernel); + /** Set the src, weights, and dst tensors. * * @note: DirectConvolution only works in the following configurations: * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 @@ -57,16 +57,9 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. */ void configure(ITensorInfo *src, ITensorInfo *weights, ITensorInfo *dst, const PadStrideInfo &conv_info); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDirectConvolutionKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * The 3rd dimension must be the same as the input's volume 3rd dimension. - * Data type supported:Same as @p input. - * @param[in] dst Output tensor. - * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32 - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * Similar to CpuDirectConv2dKernel::configure() * * @return a status */ @@ -97,4 +90,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_CPU_DIRECTCONVOLUTION_KERNEL_H */ +#endif /*ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H */ diff --git a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp index 5f7a574e5a..662d052941 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp +++ b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h" +#include "src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" @@ -384,8 +384,8 @@ void output_stage_nhwc(ITensor *src, const ITensor *bias, const Window &window, } } // namespace -void CpuDirectConvolutionOutputStageKernel::configure(ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, - const DirectConvolutionLayerOutputStageKernelInfo &info) +void CpuDirectConv2dOutputStageKernel::configure(ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, + const DirectConvolutionLayerOutputStageKernelInfo &info) { ARM_COMPUTE_UNUSED(bias); // Perform validation step @@ -483,14 +483,14 @@ void CpuDirectConvolutionOutputStageKernel::configure(ITensorInfo *src, const IT } } -Status CpuDirectConvolutionOutputStageKernel::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, - const DirectConvolutionLayerOutputStageKernelInfo &info) +Status CpuDirectConv2dOutputStageKernel::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst, + const DirectConvolutionLayerOutputStageKernelInfo &info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, bias, dst, info)); return Status{}; } -void CpuDirectConvolutionOutputStageKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuDirectConv2dOutputStageKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -504,9 +504,9 @@ void CpuDirectConvolutionOutputStageKernel::run_op(ITensorPack &tensors, const W (*_func)(src, bias, window, dst, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift); } -const char *CpuDirectConvolutionOutputStageKernel::name() const +const char *CpuDirectConv2dOutputStageKernel::name() const { - return "CpuDirectConvolutionOutputStageKernel"; + return "CpuDirectConv2dOutputStageKernel"; } } // namespace kernels } // namespace cpu diff --git a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h index 9eeab194cb..62bc5d41c9 100644 --- a/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.h +++ b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_DIRECTCONVOLUTION_OUTPUTSTAGE_KERNEL_H -#define ARM_COMPUTE_CPU_DIRECTCONVOLUTION_OUTPUTSTAGE_KERNEL_H +#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H +#define ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H #include "arm_compute/core/KernelDescriptors.h" #include "src/core/common/Macros.h" @@ -41,33 +41,27 @@ namespace kernels * @note For quantized computations (i.e. @p src of S32 type) the output data type for auto-initialization must be passed as part * of the @ref DirectConvolutionLayerOutputStageKernelInfo. */ -class CpuDirectConvolutionOutputStageKernel : public ICpuKernel +class CpuDirectConv2dOutputStageKernel : public ICpuKernel { public: /** Default constructor */ - CpuDirectConvolutionOutputStageKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConvolutionOutputStageKernel); + CpuDirectConv2dOutputStageKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv2dOutputStageKernel); /** Set the accumulate buffer and the biases of the kernel. * - * @param[in, out] src Input to add the bias to. If @p output is not specified then accumulation is done in-place. + * @param[in, out] src Input to add the bias to. If @p dst is not specified then accumulation is done in-place. * Data type supported: F16/F32/S32 * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p src - * @param[out] dst (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) + * @param[out] dst (Optional) If the dst tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p src is S32 * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata */ void configure(ITensorInfo *src, const ITensorInfo *bias = nullptr, ITensorInfo *dst = nullptr, const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CpuDirectConvolutionOutputStageKernel + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Input to add the bias to. If @p output is not specified then accumulation is done in-place. - * Data type supported: F16/F32/S32 - * @param[in] bias (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p src - * @param[in] dst (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr) - * Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr. - * Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p src is S32 - * @param[in] info (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata + * Similar to CpuDirectConv2dOutputStageKernel::configure() * * @return a status */ @@ -90,4 +84,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_CPU_DIRECTCONVOLUTION_OUTPUTSTAGE_KERNEL_H */ +#endif /*ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H */ diff --git a/src/core/cpu/kernels/CpuPoolingKernel.cpp b/src/core/cpu/kernels/CpuPool2dKernel.cpp index a55f60d7ad..e6f5890685 100644 --- a/src/core/cpu/kernels/CpuPoolingKernel.cpp +++ b/src/core/cpu/kernels/CpuPool2dKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuPoolingKernel.h" +#include "src/core/cpu/kernels/CpuPool2dKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" @@ -374,12 +374,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITenso } } // namespace -BorderSize CpuPoolingKernel::border_size() const +BorderSize CpuPool2dKernel::border_size() const { return _border_size; } -void CpuPoolingKernel::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) +void CpuPool2dKernel::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); const PadStrideInfo pad_stride_info = pool_info.pad_stride_info; @@ -420,7 +420,7 @@ void CpuPoolingKernel::configure(ITensorInfo *src, ITensorInfo *dst, const Pooli } } -Status CpuPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) +Status CpuPool2dKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &pool_info, const ITensorInfo *indices) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); @@ -446,7 +446,7 @@ Status CpuPoolingKernel::validate(const ITensorInfo *src, const ITensorInfo *dst return Status{}; } -void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuPool2dKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -505,9 +505,9 @@ void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const uk->ukernel(src, dst, indices, _pool_info, window_src, window); } -const char *CpuPoolingKernel::name() const +const char *CpuPool2dKernel::name() const { - return "CpuPoolingKernel"; + return "CpuPool2dKernel"; } } // namespace kernels } // namespace cpu diff --git a/src/core/cpu/kernels/CpuPoolingKernel.h b/src/core/cpu/kernels/CpuPool2dKernel.h index 87d8f67119..95298004e9 100644 --- a/src/core/cpu/kernels/CpuPoolingKernel.h +++ b/src/core/cpu/kernels/CpuPool2dKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_POOLING_KERNEL_H -#define ARM_COMPUTE_CPU_POOLING_KERNEL_H +#ifndef ARM_COMPUTE_CPU_POOL2D_KERNEL_H +#define ARM_COMPUTE_CPU_POOL2D_KERNEL_H #include "arm_compute/core/Types.h" #include "src/core/common/Macros.h" @@ -35,12 +35,12 @@ namespace cpu namespace kernels { /** Interface for the pooling layer kernel */ -class CpuPoolingKernel : public ICpuKernel +class CpuPool2dKernel : public ICpuKernel { public: /** Default constructor */ - CpuPoolingKernel() = default; - ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPoolingKernel); + CpuPool2dKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dKernel); /** Configure kernel for a given list of arguments * * @note F16 are supported for pool sizes 2 and 3 only @@ -51,14 +51,9 @@ public: * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32. */ void configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &pool_info, ITensorInfo *indices = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CpuPoolingKernel + /** Static function to check if given info will lead to a valid configuration * - * @note F16 are supported for pool sizes 2 and 3 only - * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor info. Data types supported: Same as @p src. - * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. - * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32. + * Similar to CpuPool2dKernel::configure() * * @return a status */ @@ -80,4 +75,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /*ARM_COMPUTE_CPU_POOLING_KERNEL_H */ +#endif /*ARM_COMPUTE_CPU_POOL2D_KERNEL_H */ diff --git a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp index ccf73883f0..c78ffb9848 100644 --- a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp +++ b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h" +#include "src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -41,7 +41,7 @@ namespace kernels { using namespace arm_compute::misc::shape_calculator; -void CpuPoolingAssemblyWrapperKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +void CpuPool2dAssemblyWrapperKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); @@ -88,7 +88,7 @@ void CpuPoolingAssemblyWrapperKernel::configure(const ITensorInfo *src, ITensorI INEKernel::configure(win); } -Status CpuPoolingAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info) +Status CpuPool2dAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst); @@ -136,7 +136,7 @@ Status CpuPoolingAssemblyWrapperKernel::validate(const ITensorInfo *src, const I return Status{}; } -void CpuPoolingAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) +void CpuPool2dAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) { ARM_COMPUTE_ERROR_ON_NULLPTR(_kernel_asm.get()); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -170,18 +170,18 @@ void CpuPoolingAssemblyWrapperKernel::run_op(ITensorPack &tensors, const Window working_space, info.thread_id, info.num_threads); } -size_t CpuPoolingAssemblyWrapperKernel::get_working_size(unsigned int num_threads) const +size_t CpuPool2dAssemblyWrapperKernel::get_working_size(unsigned int num_threads) const { return _kernel_asm->get_working_size(num_threads); } -bool CpuPoolingAssemblyWrapperKernel::is_configured() const +bool CpuPool2dAssemblyWrapperKernel::is_configured() const { return _kernel_asm != nullptr; } template <typename Typesrc, typename Typedst> -void CpuPoolingAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +void CpuPool2dAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) { const arm_conv::pooling::PoolingType pool_type = (info.pool_type == PoolingType::AVG) ? arm_conv::pooling::PoolingType::AVERAGE : arm_conv::pooling::PoolingType::MAX; @@ -220,7 +220,7 @@ void CpuPoolingAssemblyWrapperKernel::create_arm_pooling(const ITensorInfo *src, } template <typename Typesrc, typename Typedst> -void CpuPoolingAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) +void CpuPool2dAssemblyWrapperKernel::create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info) { const arm_conv::pooling::PoolingType pool_type = (info.pool_type == PoolingType::AVG) ? arm_conv::pooling::PoolingType::AVERAGE : arm_conv::pooling::PoolingType::MAX; diff --git a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h index 34ec452deb..3afa4c16a4 100644 --- a/src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.h +++ b/src/core/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CPU_POOLING_ASSEMBLY_WRAPPER_KERNEL_H -#define ARM_COMPUTE_CPU_POOLING_ASSEMBLY_WRAPPER_KERNEL_H +#ifndef ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H +#define ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H #include "arm_compute/core/Types.h" #include "src/core/NEON/kernels/assembly/pooling.hpp" @@ -41,23 +41,21 @@ namespace kernels * * Some kernels were written in assembly and highly optimised for specific * CPUs like A53 or A55. The arm compute library creates an instance of - * CpuPoolingAssemblyWrapperKernel and other auxiliary data structures to + * CpuPool2dAssemblyWrapperKernel and other auxiliary data structures to * execute a single assembly kernel in the context of an NEFunction. * */ -class CpuPoolingAssemblyWrapperKernel final : public ICpuKernel +class CpuPool2dAssemblyWrapperKernel final : public ICpuKernel { public: /** Constructor */ - CpuPoolingAssemblyWrapperKernel() = default; - CpuPoolingAssemblyWrapperKernel(CpuPoolingAssemblyWrapperKernel &) = delete; - CpuPoolingAssemblyWrapperKernel(CpuPoolingAssemblyWrapperKernel &&) = default; - CpuPoolingAssemblyWrapperKernel &operator=(CpuPoolingAssemblyWrapperKernel &) = delete; + CpuPool2dAssemblyWrapperKernel() = default; + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dAssemblyWrapperKernel); const char *name() const override { - return "CpuPoolingAssemblyWrapperKernel"; + return "CpuPool2dAssemblyWrapperKernel"; } /** Initialise the kernel's src and dst. @@ -69,13 +67,11 @@ public: */ void configure(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info); - /** Indicates whether or not this function can be used to process the given parameters. + /** Static function to check if given info will lead to a valid configuration * - * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. - * @param[in] dst Destination tensor to store the result of pooling. Data types supported: same as @p src. - * @param[in] info Pooling meta-data + * Similar to CpuPool2dAssemblyWrapperKernel::configure() * - * @return a status. + * @return a status */ static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const PoolingLayerInfo &info); @@ -120,4 +116,4 @@ private: } // namespace kernels } // namespace cpu } // namespace arm_compute -#endif /* ARM_COMPUTE_CPU_POOLING_ASSEMBLY_WRAPPER_KERNEL_H */ +#endif /* ARM_COMPUTE_CPU_POOL2D_ASSEMBLY_WRAPPER_KERNEL_H */ |