From 2897e61e8fe04aaf95540f4525c3dd3f7f46ebfa Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 20 Nov 2018 18:38:29 +0000 Subject: COMPMID-1645 NEL2Normalization for FP32/FP16 & NHWC Change-Id: I29e35024e29781a6b943b568abec9c73649215e6 --- .../core/NEON/kernels/NEL2NormalizeLayerKernel.h | 8 +- .../core/NEON/wrapper/intrinsics/intrinsics.h | 1 + arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h | 53 ++++++++ .../runtime/NEON/functions/NEL2NormalizeLayer.h | 8 +- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 2 +- src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 139 +++++++++++++++++++-- .../NEON/kernels/NEReductionOperationKernel.cpp | 12 +- src/runtime/CL/functions/CLL2NormalizeLayer.cpp | 5 +- src/runtime/NEON/functions/NEL2NormalizeLayer.cpp | 9 +- .../NEON/functions/NEReductionOperation.cpp | 2 +- tests/validation/CL/L2NormalizeLayer.cpp | 4 +- tests/validation/NEON/L2NormalizeLayer.cpp | 43 +++++-- 12 files changed, 243 insertions(+), 43 deletions(-) create mode 100644 arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h index 0de07fdab7..f893c4ae6b 100644 --- a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h @@ -52,24 +52,24 @@ public: ~NEL2NormalizeLayerKernel() = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] input Source tensor. Data types supported: F16/F32. * @param[in] sum Sum values tensor. Data types supported: same as @p input. * Sum will have the same number of dimensions as input. * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. * Output will have the same number of dimensions as input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0, 1, 2 * @param[in] epsilon Lower bound value for the normalization. */ void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon); /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel. * - * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] input Source tensor info. Data types supported: F16/F32. * @param[in] sum Sum values tensor info. Data types supported: same as @p input. * Sum will have the same number of dimensions as input. * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. * Output will have the same number of dimensions as input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0, 1, 2 * @param[in] epsilon Lower bound value for the normalization. * * @return a status diff --git a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h index 2e6fd75005..7ea0aba565 100644 --- a/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/arm_compute/core/NEON/wrapper/intrinsics/intrinsics.h @@ -32,6 +32,7 @@ #include "arm_compute/core/NEON/wrapper/intrinsics/getlane.h" #include "arm_compute/core/NEON/wrapper/intrinsics/getlow.h" #include "arm_compute/core/NEON/wrapper/intrinsics/inv.h" +#include "arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h" #include "arm_compute/core/NEON/wrapper/intrinsics/load.h" #include "arm_compute/core/NEON/wrapper/intrinsics/max.h" #include "arm_compute/core/NEON/wrapper/intrinsics/min.h" diff --git a/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h b/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h new file mode 100644 index 0000000000..0bbf49b5c0 --- /dev/null +++ b/arm_compute/core/NEON/wrapper/intrinsics/invsqrt.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_WRAPPER_INVSQRT_H__ +#define __ARM_COMPUTE_WRAPPER_INVSQRT_H__ + +#include "arm_compute/core/NEON/NEMath.h" +#include + +namespace arm_compute +{ +namespace wrapper +{ +#define VINVSQRT_IMPL(stype, vtype, prefix, postfix) \ + inline vtype vinvsqrt(const vtype &a) \ + { \ + return prefix##_##postfix(a); \ + } + +VINVSQRT_IMPL(float, float32x2_t, vinvsqrt, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VINVSQRT_IMPL(float16_t, float16x4_t, vinvsqrt, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +VINVSQRT_IMPL(float, float32x4_t, vinvsqrtq, f32) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +VINVSQRT_IMPL(float16_t, float16x8_t, vinvsqrtq, f16) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +#undef VINVSQRT_IMPL +} // namespace wrapper +} // namespace arm_compute +#endif /* __ARM_COMPUTE_WRAPPER_INVSQRT_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h index c089856fcd..ba506fa9ab 100644 --- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h @@ -50,18 +50,18 @@ public: NEL2NormalizeLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in, out] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in, out] input Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0) * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0, 1, 2 * @param[in] epsilon (Optional) Lower bound value for the normalization. */ void configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon = 1e-12f); /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayer. * - * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] input Source tensor info. Data types supported: F16/F32. (Written to only for border_size != 0) * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0, 1, 2 * @param[in] epsilon (Optional) Lower bound value for the normalization. * * @return a status diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 97dd919d08..4f36046b28 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -50,7 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 2, "Axis greater than 2 is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); // Reduce shape on axis diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp index ed037832af..cda041de66 100644 --- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp @@ -32,15 +32,20 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/NEON/wrapper/wrapper.h" #include #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { +template void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window) { + /** NEON vector tag type. */ + using ExactTagType = typename wrapper::traits::neon_vector::tag_type; + Window window_sum(window); window_sum.set(Window::DimX, Window::Dimension(0, 0, 0)); @@ -53,30 +58,97 @@ void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float e Iterator sum_it(sum, sum_slice); Iterator output_it(out, in_slice); - const float sum_value = *reinterpret_cast(sum_it.ptr()); - const float32x4_t vec_normalize_value = vdupq_n_f32(1.f / std::sqrt(std::max(sum_value, epsilon))); + const auto sum_value = *reinterpret_cast(sum_it.ptr()); + const auto vec_normalize_value = wrapper::vdup_n(static_cast(1.f / std::sqrt(std::max(sum_value, static_cast(epsilon)))), ExactTagType{}); execute_window_loop(in_slice, [&](const Coordinates & id) { - const auto in_ptr = reinterpret_cast(input_it.ptr()); - const auto out_ptr = reinterpret_cast(output_it.ptr()); + const auto in_ptr = reinterpret_cast(input_it.ptr()); + const auto out_ptr = reinterpret_cast(output_it.ptr()); - vst1q_f32(out_ptr, vmulq_f32(vld1q_f32(in_ptr), vec_normalize_value)); + wrapper::vstore(out_ptr, wrapper::vmul(wrapper::vloadq(in_ptr), vec_normalize_value)); }, input_it, output_it); } while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice)); } +template +void l2_normalize_Y(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window) +{ + /** NEON vector tag type. */ + using ExactTagType = typename wrapper::traits::neon_vector::tag_type; + + Window window_sum(window); + window_sum.set(Window::DimY, Window::Dimension(0, 0, 0)); + + Window in_slice = window.first_slice_window_2D(); + Window sum_slice = window_sum.first_slice_window_2D(); + + do + { + Iterator input_it(in, in_slice); + Iterator sum_it(sum, sum_slice); + Iterator output_it(out, in_slice); + + auto eps = wrapper::vdup_n(static_cast(epsilon), ExactTagType{}); + + execute_window_loop(in_slice, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(input_it.ptr()); + const auto sum_ptr = reinterpret_cast(sum_it.ptr()); + const auto out_ptr = reinterpret_cast(output_it.ptr()); + + const auto vec_normalize_value = wrapper::vinvsqrt(wrapper::vmax(wrapper::vloadq(sum_ptr), eps)); + wrapper::vstore(out_ptr, wrapper::vmul(wrapper::vloadq(in_ptr), vec_normalize_value)); + }, + input_it, sum_it, output_it); + } + while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(sum_slice)); +} + +template +void l2_normalize_Z(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window) +{ + /** NEON vector tag type. */ + using ExactTagType = typename wrapper::traits::neon_vector::tag_type; + + Window window_sum(window); + window_sum.set(Window::DimZ, Window::Dimension(0, 0, 0)); + + Window in_slice = window.first_slice_window_3D(); + Window sum_slice = window_sum.first_slice_window_3D(); + + do + { + Iterator input_it(in, in_slice); + Iterator sum_it(sum, sum_slice); + Iterator output_it(out, in_slice); + + auto eps = wrapper::vdup_n(static_cast(epsilon), ExactTagType{}); + + execute_window_loop(in_slice, [&](const Coordinates & id) + { + const auto in_ptr = reinterpret_cast(input_it.ptr()); + const auto sum_ptr = reinterpret_cast(sum_it.ptr()); + const auto out_ptr = reinterpret_cast(output_it.ptr()); + + const auto vec_normalize_value = wrapper::vinvsqrt(wrapper::vmax(wrapper::vloadq(sum_ptr), eps)); + wrapper::vstore(out_ptr, wrapper::vmul(wrapper::vloadq(in_ptr), vec_normalize_value)); + }, + input_it, sum_it, output_it); + } + while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice)); +} + Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) { ARM_COMPUTE_UNUSED(epsilon); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported normalization axis, Supported axis is 0"); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 2, "Axis greater than 2 is not supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Normalization axis greater than max number of dimensions"); // Reduce shape on axis @@ -89,7 +161,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape()); - ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); } return Status{}; @@ -158,9 +230,52 @@ void NEL2NormalizeLayerKernel::run(const Window &window, const ThreadInfo &info) switch(_axis) { case 0: - l2_normalize_X(_input, _sum, _output, _epsilon, window); + switch(_input->info()->data_type()) + { + case DataType::F32: + l2_normalize_X(_input, _sum, _output, _epsilon, window); + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + l2_normalize_X(_input, _sum, _output, _epsilon, window); + break; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + break; + case 1: + switch(_input->info()->data_type()) + { + case DataType::F32: + l2_normalize_Y(_input, _sum, _output, _epsilon, window); + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + l2_normalize_Y(_input, _sum, _output, _epsilon, window); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + break; + default: + ARM_COMPUTE_ERROR("Not implemented"); + } + break; + case 2: + switch(_input->info()->data_type()) + { + case DataType::F32: + l2_normalize_Z(_input, _sum, _output, _epsilon, window); + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + l2_normalize_Z(_input, _sum, _output, _epsilon, window); + break; +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + default: + ARM_COMPUTE_ERROR("Not implemented"); + } break; default: ARM_COMPUTE_ERROR("Unsupported normalization axis"); } } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index 182e93d177..9306e0303d 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -66,12 +66,14 @@ public: { // Set in window Window in_window(window); + Window out_window(window); in_window.set(Window::DimY, Window::Dimension(0, 1, 1)); + out_window.set(Window::DimY, Window::Dimension(0, output->info()->dimension(1), output->info()->dimension(1))); // Get first input and output slices Window in_slice = in_window.first_slice_window_2D(); - Window out_slice = window.first_slice_window_2D(); + Window out_slice = out_window.first_slice_window_2D(); do { @@ -80,18 +82,20 @@ public: f(in, out, in_slice, out_slice, *input->info(), 1); } - while(in_window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); + while(in_window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice)); } static void reduceZ(const Window &window, const ITensor *input, ITensor *output, F f) { // Set in window Window in_window(window); + Window out_window(window); in_window.set(Window::DimZ, Window::Dimension(0, 1, 1)); + out_window.set(Window::DimZ, Window::Dimension(0, output->info()->dimension(2), output->info()->dimension(2))); // Get first input and output slices Window in_slice = in_window.first_slice_window_3D(); - Window out_slice = window.first_slice_window_3D(); + Window out_slice = out_window.first_slice_window_3D(); do { @@ -100,7 +104,7 @@ public: f(in, out, in_slice, out_slice, *input->info(), 2); } - while(in_window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(out_slice)); + while(in_window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice)); } static void reduceW(const Window &window, const ITensor *input, ITensor *output, F f) { diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp index 4f709d561d..2e3c6d7763 100644 --- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp +++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp @@ -32,8 +32,8 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() { @@ -81,3 +81,4 @@ void CLL2NormalizeLayer::run() _memory_group.release(); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp index d0b80fb1b8..56da966abf 100644 --- a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp +++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp @@ -26,8 +26,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -using namespace arm_compute; - +namespace arm_compute +{ NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq() { @@ -57,8 +57,8 @@ Status NEL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperation::validate(input, &sum_sq, axis, ReductionOperation::SUM_SQUARE)); - // Reduce shape on axis (supported axis is 0) - shape.set(0, 1); + // Reduce shape on axis + shape.set(axis, 1); sum_sq.set_tensor_shape(shape); ARM_COMPUTE_RETURN_ON_ERROR(NEL2NormalizeLayerKernel::validate(input, &sum_sq, output, axis, epsilon)); @@ -75,3 +75,4 @@ void NEL2NormalizeLayer::run() _memory_group.release(); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 188c2bbb18..bb27b5d47a 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -86,7 +86,7 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i if(axis == 0) { // Configure fill border kernel - BorderSize fill_border_size = (axis == 0) ? _reduction_kernel.border_size() : BorderSize(); + BorderSize fill_border_size = _reduction_kernel.border_size(); BorderMode fill_border_mode = reduction_operation_border_mode(op); _fill_border_kernel.configure(input, fill_border_size, fill_border_mode, PixelValue(static_cast(0.f))); } diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp index 13a33a864c..fdbfa3ed4d 100644 --- a/tests/validation/CL/L2NormalizeLayer.cpp +++ b/tests/validation/CL/L2NormalizeLayer.cpp @@ -62,7 +62,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1 TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions - TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 3 + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 2 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F16), @@ -73,7 +73,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) })), - framework::dataset::make("Axis", { 0U, 0U, 0U, 0U, static_cast(TensorShape::num_max_dimensions), 4U, 0U })), + framework::dataset::make("Axis", { 0U, 0U, 0U, 0U, static_cast(TensorShape::num_max_dimensions), 3U, 0U })), framework::dataset::make("Expected", { false, false, false, false, false, false, true })), input_info, output_info, axis, expected) { diff --git a/tests/validation/NEON/L2NormalizeLayer.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp index 0a1ddba77c..3164a65417 100644 --- a/tests/validation/NEON/L2NormalizeLayer.cpp +++ b/tests/validation/NEON/L2NormalizeLayer.cpp @@ -44,6 +44,9 @@ namespace { /** Tolerance for float operations */ RelativeTolerance tolerance_f32(0.00001f); +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +RelativeTolerance tolerance_f16(0.2f); +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC } // namespace TEST_SUITE(NEON) @@ -57,7 +60,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1 TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions - TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 0 + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 2 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F16), @@ -68,7 +71,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) })), - framework::dataset::make("Axis", { 0U, 0U, 0U, 0U, static_cast(TensorShape::num_max_dimensions), 1U, 0U })), + framework::dataset::make("Axis", { 0U, 0U, 0U, 0U, static_cast(TensorShape::num_max_dimensions), 3U, 0U })), framework::dataset::make("Expected", { false, false, false, false, false, false, true })), input_info, output_info, axis, expected) { @@ -85,8 +88,8 @@ using NEL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("Axis", { 0 })), + combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("Axis", { 0, 1, 2 })), framework::dataset::make("Epsilon", { 1e-12 }))) { // Validate output @@ -94,17 +97,39 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture, framework::Da } FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), - framework::dataset::make("Axis", { 0 })), + combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("Axis", { 0, 1, 2 })), framework::dataset::make("Epsilon", { 1e-12 }))) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); } -TEST_SUITE_END() +TEST_SUITE_END() // FP32 -TEST_SUITE_END() -TEST_SUITE_END() +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("Axis", { 0, 1, 2 })), + framework::dataset::make("Epsilon", { 1e-12 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("Axis", { 0, 1, 2 })), + framework::dataset::make("Epsilon", { 1e-12 }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // FP16 +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +TEST_SUITE_END() // L2NormalizeLayer +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1