From 95abfddfa08ab85d4f88c6f4d2e077969178f2d5 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 28 Nov 2018 14:59:47 +0000 Subject: COMPMID-1815 Remove templates from NEFillborderKernel and create INESimpleFunctionNoBorder Change-Id: Ia9fdc75b23e9a6208058f8406fb7b5fcd917de2c Reviewed-on: https://review.mlplatform.org/311 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/NEON/kernels/NEFillBorderKernel.cpp | 160 ++++++++----------------- src/runtime/NEON/INESimpleFunctionNoBorder.cpp | 39 ++++++ 2 files changed, 86 insertions(+), 113 deletions(-) create mode 100644 src/runtime/NEON/INESimpleFunctionNoBorder.cpp (limited to 'src') diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index aef4d4865a..39bcd996f9 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -34,15 +34,12 @@ #include #include -using namespace arm_compute; - +namespace arm_compute +{ +class Coordinates; namespace { -template -void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value); - -template <> -inline void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value) +inline void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value) { float border_value; constant_border_value.get(border_value); @@ -93,11 +90,6 @@ inline void fill_constant_value_single_channel_special(ITensor *t } } // namespace -namespace arm_compute -{ -class Coordinates; -} // namespace arm_compute - NEFillBorderKernel::NEFillBorderKernel() : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast(0.f)) { @@ -142,81 +134,19 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) { case BorderMode::CONSTANT: { - switch(_tensor->info()->data_type()) + if(_border_size.left == 1 && _border_size.top == 1 && _tensor->info()->data_type() == DataType::F32) { - case DataType::QASYMM8: - case DataType::U8: - fill_constant_value_single_channel(window); - break; - case DataType::S8: - fill_constant_value_single_channel(window); - break; - case DataType::U16: - fill_constant_value_single_channel(window); - break; - case DataType::S16: - fill_constant_value_single_channel(window); - break; - case DataType::U32: - fill_constant_value_single_channel(window); - break; - case DataType::S32: - fill_constant_value_single_channel(window); - break; - case DataType::F16: - static_assert(sizeof(half) == 2, "Float16_t must be 16 bit"); - fill_constant_value_single_channel(window); - break; - case DataType::F32: - static_assert(sizeof(float) == 4, "Float must be 32 bit"); - if(_border_size.left == 1 && _border_size.top == 1) - { - fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value); - } - else - { - fill_constant_value_single_channel(window); - } - break; - default: - ARM_COMPUTE_ERROR("Not handled"); + fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value); + } + else + { + fill_constant_value_single_channel(window); } break; } case BorderMode::REPLICATE: { - switch(_tensor->info()->data_type()) - { - case DataType::QASYMM8: - case DataType::U8: - fill_replicate_single_channel(window); - break; - case DataType::S8: - fill_replicate_single_channel(window); - break; - case DataType::U16: - fill_replicate_single_channel(window); - break; - case DataType::S16: - fill_replicate_single_channel(window); - break; - case DataType::U32: - fill_replicate_single_channel(window); - break; - case DataType::S32: - fill_replicate_single_channel(window); - break; - case DataType::F16: - static_assert(sizeof(half) == 2, "Float16_t must be 16 bit"); - fill_replicate_single_channel(window); - break; - case DataType::F32: - static_assert(sizeof(float) == 4, "Float must be 32 bit"); - fill_replicate_single_channel(window); - break; - default: - ARM_COMPUTE_ERROR("Not handled"); - } + fill_replicate_single_channel(window); break; } case BorderMode::UNDEFINED: @@ -226,13 +156,12 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) } } -template void NEFillBorderKernel::fill_replicate_single_channel(const Window &window) { uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor); const size_t width = _tensor->info()->valid_region().shape[0]; const size_t height = _tensor->info()->valid_region().shape[1]; - + const size_t element_size = _tensor->info()->element_size(); // Left and right border Window vertical(window); vertical.set(Window::DimY, Window::Dimension(0, height, 1)); @@ -241,13 +170,17 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window) execute_window_loop(vertical, [&](const Coordinates & id) { - const auto row_start = reinterpret_cast(start_valid_region + vertical_it.offset()); - const auto left_val = *reinterpret_cast(vertical_it.ptr()); - const auto right_val = *(reinterpret_cast(vertical_it.ptr()) + width - 1); - + uint8_t *base_addr = start_valid_region + vertical_it.offset(); // Fill left and right borders - std::fill_n(row_start - _border_size.left, _border_size.left, left_val); - std::fill_n(row_start + width, _border_size.right, right_val); + for(unsigned int i = 0; i < _border_size.left; ++i) + { + std::memcpy(base_addr + static_cast(i - _border_size.left) * element_size, vertical_it.ptr(), element_size); + } + + for(unsigned int i = 0; i < _border_size.right; ++i) + { + std::memcpy(base_addr + (width + i) * element_size, vertical_it.ptr() + (width - 1) * element_size, element_size); + } }, vertical_it); @@ -257,41 +190,33 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window) // Iterate over all XY planes execute_window_loop(window, [&](const Coordinates & id) { - const auto first_row = reinterpret_cast(start_valid_region + plane_it.offset()); - + uint8_t *base_addr = start_valid_region + plane_it.offset(); // Top border for(int i = -_border_size.top; i < 0; ++i) { - const auto row_start = reinterpret_cast(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]); - // Copy top rows including left/right borders - std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left); + std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, + base_addr - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size); } - const auto last_row = reinterpret_cast(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]); - // Bottom border for(unsigned int i = height; i < height + _border_size.bottom; ++i) { - const auto row_start = reinterpret_cast(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]); - // Copy bottom rows including left/right borders - std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left); + std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, + base_addr + (height - 1) * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size); } }, plane_it); } -template void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window) { - T constant_border_value; - _constant_border_value.get(constant_border_value); - uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor); const size_t width = _tensor->info()->valid_region().shape[0]; const size_t height = _tensor->info()->valid_region().shape[1]; const int stridey = _tensor->info()->strides_in_bytes()[1]; + const size_t element_size = _tensor->info()->element_size(); // Left and right border Window vertical(window); @@ -301,11 +226,17 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window execute_window_loop(vertical, [&](const Coordinates & id) { - const auto row_start = reinterpret_cast(start_valid_region + vertical_it.offset()); - + uint8_t *base_addr = start_valid_region + vertical_it.offset(); // Fill left and right borders - std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value); - std::fill_n(row_start + width, _border_size.right, constant_border_value); + for(unsigned int i = 0; i < _border_size.left; ++i) + { + std::memcpy(base_addr + static_cast(i - _border_size.left) * element_size, &_constant_border_value, element_size); + } + + for(unsigned int i = 0; i < _border_size.right; ++i) + { + std::memcpy(base_addr + (width + i) * element_size, &_constant_border_value, element_size); + } }, vertical_it); @@ -319,21 +250,24 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window // Top border for(int i = -_border_size.top; i < 0; ++i) { - const auto row_start = reinterpret_cast(base_addr + i * stridey); - // Fill top rows including left/right borders - std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value); + for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j) + { + std::memcpy(base_addr + i * stridey + static_cast(j - _border_size.left) * element_size, &_constant_border_value, element_size); + } } // Bottom border const unsigned low_border_size = height + _border_size.bottom; for(unsigned int i = height; i < low_border_size; ++i) { - const auto row_start = reinterpret_cast(base_addr + i * stridey); - // Fill bottom rows including left/right borders - std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value); + for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j) + { + std::memcpy(base_addr + i * stridey + static_cast(j - _border_size.left) * element_size, &_constant_border_value, element_size); + } } }, plane_it); } +} // namespace arm_compute diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp new file mode 100644 index 0000000000..12872048c7 --- /dev/null +++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" + +#include "arm_compute/runtime/NEON/NEScheduler.h" + +namespace arm_compute +{ +INESimpleFunctionNoBorder::INESimpleFunctionNoBorder() // NOLINT + : _kernel() +{ +} + +void INESimpleFunctionNoBorder::run() +{ + NEScheduler::get().schedule(_kernel.get(), Window::DimY); +} +} // namespace arm_compute -- cgit v1.2.1