diff options
Diffstat (limited to 'src/cpu/kernels/norm_layer')
-rw-r--r-- | src/cpu/kernels/norm_layer/generic/neon/fp16.cpp | 67 | ||||
-rw-r--r-- | src/cpu/kernels/norm_layer/generic/neon/fp32.cpp | 63 | ||||
-rw-r--r-- | src/cpu/kernels/norm_layer/generic/neon/impl.h | 177 | ||||
-rw-r--r-- | src/cpu/kernels/norm_layer/generic/neon/list.h | 49 |
4 files changed, 356 insertions, 0 deletions
diff --git a/src/cpu/kernels/norm_layer/generic/neon/fp16.cpp b/src/cpu/kernels/norm_layer/generic/neon/fp16.cpp new file mode 100644 index 0000000000..f85fe7a31a --- /dev/null +++ b/src/cpu/kernels/norm_layer/generic/neon/fp16.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) + +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/cpu/CpuTypes.h" +#include "src/cpu/kernels/norm_layer/generic/neon/impl.h" + +namespace arm_compute +{ +namespace cpu +{ + +void neon_normalize_float16_8_0_2D( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float16_t, 8, 0, true>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float16_8_0( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float16_t, 8, 0, false>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float16_8_1_2D( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float16_t, 8, 1, true>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float16_8_1( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float16_t, 8, 1, false>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float16_8_2( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float16_t, 8, 2, false>(window, in, in_squared, out, ninfo); +} + +} // namespace cpu +} // namespace arm_compute +#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/norm_layer/generic/neon/fp32.cpp b/src/cpu/kernels/norm_layer/generic/neon/fp32.cpp new file mode 100644 index 0000000000..0b64f46956 --- /dev/null +++ b/src/cpu/kernels/norm_layer/generic/neon/fp32.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/core/NEON/wrapper/wrapper.h" +#include "src/cpu/CpuTypes.h" +#include "src/cpu/kernels/norm_layer/generic/neon/impl.h" + +namespace arm_compute +{ +namespace cpu +{ +void neon_normalize_float32_4_0_2D( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float, 4, 0, true>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float32_4_0( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float, 4, 0, false>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float32_4_1_2D( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float, 4, 1, true>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float32_4_1( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float, 4, 1, false>(window, in, in_squared, out, ninfo); +} + +void neon_normalize_float32_4_2( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + arm_compute::normalize_float<float, 4, 2, false>(window, in, in_squared, out, ninfo); +} +} // namespace cpu +} // namespace arm_compute diff --git a/src/cpu/kernels/norm_layer/generic/neon/impl.h b/src/cpu/kernels/norm_layer/generic/neon/impl.h new file mode 100644 index 0000000000..6103165679 --- /dev/null +++ b/src/cpu/kernels/norm_layer/generic/neon/impl.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2017-2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_NORM_LAYER_GENERIC_NEON_IMPL_H +#define ACL_SRC_CPU_KERNELS_NORM_LAYER_GENERIC_NEON_IMPL_H + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "src/core/helpers/NormalizationHelpers.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/core/NEON/NEMath.h" +#include "src/core/NEON/wrapper/wrapper.h" + +namespace arm_compute +{ +/** Function to perform normalization depending on the given template + * dimension. The second template parameter specifies whether the + * normalization has to be 1D or 2D. + * + * @note Only supported normalizations are: + * - 1D over X or Z + * - 2D over X and Y + * + * @param[in] window Region on which to execute the kernel. + * @param[in] in Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC. + * @param[in] in_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data type and layout supported: same as @p input. + * @param[in] out Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input. + * @param[in] ninfo Normalization layer information like the normalization type, normalization size and other parameters. + */ +template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm> +void normalize_float( + const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, NormalizationLayerInfo ninfo) +{ + /** SIMD vector tag type. */ + using ExactTagType = typename wrapper::traits::neon_vector<T, S>::tag_type; + + Window win(window); + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + + const auto window_start_x = static_cast<int>(window.x().start()); + const auto window_end_x = static_cast<int>(window.x().end()); + const int window_step_x = S; + + Iterator input(in, win); + Iterator input_squared(in_squared, win); + Iterator output(out, win); + + const int dim_y = in->info()->data_layout() == DataLayout::NCHW ? 1 : 2; + const int radius = ninfo.norm_size() / 2; + const int input_squared_stride_x = in_squared->info()->strides_in_bytes()[0]; + const int input_squared_stride_slice = in_squared->info()->strides_in_bytes()[dim]; + const int input_squared_stride_row = in_squared->info()->strides_in_bytes()[dim_y]; + + const int max_right = in->info()->dimension(dim) - 1; + const int max_bottom = in->info()->dimension(dim_y) - 1; + + const auto coeff_vec = wrapper::vdup_n(static_cast<T>(ninfo.scale_coeff()), ExactTagType{}); + const auto beta_vec = wrapper::vdup_n(static_cast<T>(ninfo.beta()), ExactTagType{}); + const auto kappa_vec = wrapper::vdup_n(static_cast<T>(ninfo.kappa()), ExactTagType{}); + + auto sequential_normalization = [&](const int x, const Coordinates &id, const int current_row, const int first_row, + const int last_row, const T *input_ptr, const uint8_t *input_squared_start_ptr, + T *output_ptr) + { + const int current_slice = dim == 0 ? x : id[dim]; + const int first_slice = std::max(current_slice - radius, 0); + const int last_slice = std::min(current_slice + radius, max_right); + + const uint8_t *const input_squared_x_ptr = input_squared_start_ptr + x * input_squared_stride_x; + // Accumulate 2D In-Map values + auto accu = static_cast<T>(0.f); + for (int j = first_row; j <= last_row; ++j) + { + // Compute row displacement + const uint8_t *const input_squared_ptr = input_squared_x_ptr + (j - current_row) * input_squared_stride_row; + for (int i = first_slice; i <= last_slice; ++i) + { + accu += + *reinterpret_cast<const T *>(input_squared_ptr + (i - current_slice) * input_squared_stride_slice); + } + } + + // Normalize + const auto normalized = + std::pow(accu * static_cast<T>(ninfo.scale_coeff()) + static_cast<T>(ninfo.kappa()), ninfo.beta()); + const auto normalized_pixel = (*(input_ptr + x)) / normalized; + *(output_ptr + x) = normalized_pixel; + }; + + execute_window_loop( + win, + [&](const Coordinates &id) + { + const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); + auto output_ptr = reinterpret_cast<T *>(output.ptr()); + + // Get range to normalize + const int current_row = do_2D_norm ? id[dim_y] : 0; + const int first_row = do_2D_norm ? std::max(current_row - radius, 0) : 0; + const int last_row = do_2D_norm ? std::min(current_row + radius, max_bottom) : 0; + + int x = window_start_x; + // Compute serially starting elements for the case x dimension is width + for (; x < radius && x < window_end_x && dim == 0; ++x) + { + sequential_normalization(x, id, current_row, first_row, last_row, input_ptr, input_squared.ptr(), + output_ptr); + } + + // Compute vectorized + for (; x <= window_end_x - window_step_x - radius; x += window_step_x) + { + const int current_slice = dim == 0 ? x : id[dim]; + const int first_slice = std::max(current_slice - radius, 0); + const int last_slice = std::min(current_slice + radius, max_right); + + const uint8_t *const input_squared_x_ptr = input_squared.ptr() + x * input_squared_stride_x; + // Accumulate 2D In-Map values + auto accu = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}); + for (int j = first_row; j <= last_row; ++j) + { + // Compute row displacement + const uint8_t *const input_squared_ptr = + input_squared_x_ptr + (j - current_row) * input_squared_stride_row; + for (int i = first_slice; i <= last_slice; ++i) + { + accu = wrapper::vadd( + accu, wrapper::vloadq(reinterpret_cast<const T *>( + input_squared_ptr + (i - current_slice) * input_squared_stride_slice))); + } + } + + // Normalize + const auto normalized = wrapper::vpow(wrapper::vmla(kappa_vec, coeff_vec, accu), beta_vec); + const auto normalized_pixel = wrapper::vmul(wrapper::vloadq(input_ptr + x), wrapper::vinv(normalized)); + wrapper::vstore(reinterpret_cast<T *>(output_ptr + x), normalized_pixel); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + sequential_normalization(x, id, current_row, first_row, last_row, input_ptr, input_squared.ptr(), + output_ptr); + } + }, + input, input_squared, output); +} + +} // namespace arm_compute +#endif // ACL_SRC_CPU_KERNELS_NORM_LAYER_GENERIC_NEON_IMPL_H diff --git a/src/cpu/kernels/norm_layer/generic/neon/list.h b/src/cpu/kernels/norm_layer/generic/neon/list.h new file mode 100644 index 0000000000..f2e83d7af1 --- /dev/null +++ b/src/cpu/kernels/norm_layer/generic/neon/list.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_NORM_LAYER_GENERIC_NEON_LIST_H +#define ACL_SRC_CPU_KERNELS_NORM_LAYER_GENERIC_NEON_LIST_H +namespace arm_compute +{ +namespace cpu +{ + +#define DECLARE_NORMALIZATION_KERNEL(func_name) \ + void func_name(const Window &window, const ITensor *in, const ITensor *in_squared, ITensor *out, \ + NormalizationLayerInfo ninfo) + +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float32_4_0_2D); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float32_4_0); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float32_4_1_2D); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float32_4_1); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float32_4_2); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float16_8_0_2D); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float16_8_0); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float16_8_1_2D); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float16_8_1); +DECLARE_NORMALIZATION_KERNEL(neon_normalize_float16_8_2); + +#undef DECLARE_NORMALIZATION_KERNEL +} // namespace cpu +} // namespace arm_compute +#endif // ACL_SRC_CPU_KERNELS_NORM_LAYER_GENERIC_NEON_LIST_H |