From 0a8334cb78dae66fdc31257a96ba15f7c41bde50 Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Wed, 14 Jun 2017 18:00:05 +0100
Subject: COMPMID-400 Add support for 16 bit fixed point arithmetic.

Change-Id: Iebfaef1b219d80d6362b7fd4b1357612b31e43cb
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/77749
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
---
 tests/validation/NEON/Fixedpoint/Exp_QS16.cpp      | 124 +++++++++++++++++++++
 tests/validation/NEON/Fixedpoint/Exp_QS8.cpp       |   4 +-
 tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp  | 122 ++++++++++++++++++++
 tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp   |   9 +-
 tests/validation/NEON/Fixedpoint/Log_QS16.cpp      | 123 ++++++++++++++++++++
 tests/validation/NEON/Fixedpoint/Log_QS8.cpp       |   4 +-
 .../validation/NEON/Fixedpoint/Reciprocal_QS16.cpp | 123 ++++++++++++++++++++
 .../validation/NEON/Fixedpoint/Reciprocal_QS8.cpp  |   4 +-
 8 files changed, 502 insertions(+), 11 deletions(-)
 create mode 100644 tests/validation/NEON/Fixedpoint/Exp_QS16.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Log_QS16.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp

(limited to 'tests/validation/NEON/Fixedpoint')
diff --git a/tests/validation/NEON/Fixedpoint/Exp_QS16.cpp b/tests/validation/NEON/Fixedpoint/Exp_QS16.cpp
new file mode 100644
index 0000000000..e6d7d860cb
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Exp_QS16.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 1.0f; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon exponential function for signed 16 bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_exp_qs16(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 8;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between [-1.0, 1.0) so the result won't
+    // overflow.
+    std::uniform_int_distribution<> distribution(-(1 << (fixed_point_position - 1)), (1 << (fixed_point_position - 1)));
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint16x8_t in = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr()));
+        // Use saturated exp
+        vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqexpq_qs16(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_AUTO_TEST_SUITE(Exp)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 15), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_exp_qs16(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::EXP, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp b/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp
index 086314fdd3..f8fc0c2ea3 100644
--- a/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp
+++ b/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp
@@ -78,9 +78,9 @@ Tensor compute_exp_qs8(const TensorShape &shape, int fixed_point_position)
     BOOST_TEST(!src.info()->is_resizable());
     BOOST_TEST(!dst.info()->is_resizable());
 
-    // Fill tensors. Keep the range between (1, (1 << (fixed_point_position - 1))) so the result won't
+    // Fill tensors. Keep the range between [-1.0, 1.0) so the result won't
     // overflow. E.g. e^7 = 1096, which cannot be represented in QS8
-    std::uniform_int_distribution<> distribution(1, (1 << (fixed_point_position - 1)));
+    std::uniform_int_distribution<> distribution(-(1 << (fixed_point_position - 1)), (1 << (fixed_point_position - 1)));
     library->fill(NEAccessor(src), distribution, 0);
 
     Iterator input(&src, window);
diff --git a/tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp b/tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp
new file mode 100644
index 0000000000..9211ccfe50
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Invsqrt_QS16.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 5.0f; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon inverse square root function for signed 16 bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_invsqrt_qs16(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 8;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between [1, 0x7FFF)
+    std::uniform_int_distribution<> distribution(1, 0x7FFF);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint16x8_t in = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr()));
+        vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqinvsqrtq_qs16(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_AUTO_TEST_SUITE(Invsqrt)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 14), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_invsqrt_qs16(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::INV_SQRT, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp b/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp
index 3308f7d855..ab63cbe76f 100644
--- a/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp
+++ b/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp
@@ -49,7 +49,7 @@ using namespace arm_compute::test::validation;
 
 namespace
 {
-const float tolerance = 3; /**< Tolerance value for comparing reference's output against implementation's output */
+const float tolerance = 4.0f; /**< Tolerance value for comparing reference's output against implementation's output */
 
 /** Compute Neon inverse square root function for signed 8bit fixed point.
  *
@@ -78,9 +78,8 @@ Tensor compute_invsqrt_qs8(const TensorShape &shape, int fixed_point_position)
     BOOST_TEST(!src.info()->is_resizable());
     BOOST_TEST(!dst.info()->is_resizable());
 
-    // Fill tensors. Keep the range between (32, 127) so the result won't
-    // overflow. E.g. for Q2.5 invsqrt(0.001) = 31.6, which cannot be represented.
-    std::uniform_int_distribution<> distribution(32, 127);
+    // Fill tensors. Keep the range between [1, 127).
+    std::uniform_int_distribution<> distribution(1, 127);
     library->fill(NEAccessor(src), distribution, 0);
 
     Iterator input(&src, window);
@@ -89,7 +88,7 @@ Tensor compute_invsqrt_qs8(const TensorShape &shape, int fixed_point_position)
     execute_window_loop(window, [&](const Coordinates & id)
     {
         qint8x16_t in = vld1q_s8(reinterpret_cast<const qint8_t *>(input.ptr()));
-        vst1q_s8(reinterpret_cast<qint8_t *>(output.ptr()), vinvsqrtq_qs8(in, fixed_point_position));
+        vst1q_s8(reinterpret_cast<qint8_t *>(output.ptr()), vqinvsqrtq_qs8(in, fixed_point_position));
     },
     input, output);
 
diff --git a/tests/validation/NEON/Fixedpoint/Log_QS16.cpp b/tests/validation/NEON/Fixedpoint/Log_QS16.cpp
new file mode 100644
index 0000000000..c23d12725b
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Log_QS16.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 7.0f; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon logarithm function for signed 16 bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_log_qs16(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 8;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between [(1 << (fixed_point_position - 1), 0x3FFF) so the result won't
+    // overflow.
+    std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 0x3FFF);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint16x8_t in = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr()));
+        vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vlogq_qs16(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_AUTO_TEST_SUITE(Log)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(4, 14), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_log_qs16(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::LOG, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Log_QS8.cpp b/tests/validation/NEON/Fixedpoint/Log_QS8.cpp
index 7b734c12b1..6789ec7264 100644
--- a/tests/validation/NEON/Fixedpoint/Log_QS8.cpp
+++ b/tests/validation/NEON/Fixedpoint/Log_QS8.cpp
@@ -78,9 +78,9 @@ Tensor compute_log_qs8(const TensorShape &shape, int fixed_point_position)
     BOOST_TEST(!src.info()->is_resizable());
     BOOST_TEST(!dst.info()->is_resizable());
 
-    // Fill tensors. Keep the range between ((1 << (fixed_point_position - 1), 63) so the result won't
+    // Fill tensors. Keep the range between [(1 << (fixed_point_position - 1), 63) so the result won't
     // overflow. E.g. for Q2.5 ln(0.001) = -6.9, which cannot be represented.
-    std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 63);
+    std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 0x3F);
     library->fill(NEAccessor(src), distribution, 0);
 
     Iterator input(&src, window);
diff --git a/tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp b/tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp
new file mode 100644
index 0000000000..c66cf0e6e6
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Reciprocal_QS16.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 9.0f; /**< Tolerance value for comparing reference's output against implementation's output. */
+
+/** Compute Neon reciprocal function for signed 16 bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_reciprocal_qs16(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS16, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 8;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between [15, 0x7FFF) so the result won't
+    // overflow.
+    std::uniform_int_distribution<> distribution(15, 0x7FFF);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint16x8_t in = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr()));
+        vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqrecipq_qs16(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_AUTO_TEST_SUITE(Reciprocal)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 14), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_reciprocal_qs16(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS16, DataType::QS16, FixedPointOp::RECIPROCAL, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp b/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp
index 4c1c782a18..f1f130a9cb 100644
--- a/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp
+++ b/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp
@@ -78,9 +78,9 @@ Tensor compute_reciprocal_qs8(const TensorShape &shape, int fixed_point_position
     BOOST_TEST(!src.info()->is_resizable());
     BOOST_TEST(!dst.info()->is_resizable());
 
-    // Fill tensors. Keep the range between (15, 100) so the result won't
+    // Fill tensors. Keep the range between [15, 100) so the result won't
     // overflow. E.g. for Q2.5 reciprocal(0.001) = 1000, which cannot be represented.
-    std::uniform_int_distribution<> distribution(15, 100);
+    std::uniform_int_distribution<> distribution(15, 0x7F);
     library->fill(NEAccessor(src), distribution, 0);
 
     Iterator input(&src, window);
-- 
cgit v1.2.1