aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamy Elgammal <ramy.elgammal@arm.com>2023-03-31 16:16:15 +0100
committerRamy Elgammal <ramy.elgammal@arm.com>2023-04-11 09:27:02 +0000
commit8b7f42aa0e76a65a4ffa46ee875df6a6220695ae (patch)
tree3bc006b26a659e5b404dbda48ecb259626d26fef
parentdcab9ca1b7914eb8cadadfaa5fb468e469dca5d9 (diff)
downloadComputeLibrary-8b7f42aa0e76a65a4ffa46ee875df6a6220695ae.tar.gz
Enable quantized data types for CpuElementwiseUnary on Armv7a
- Adding fallback functions neon_qasymm8_signed_elementwise_unary() and neon_qasymm8_elementwise_unary() - They would be called in case target is not aarch64 Resolves: COMPMID-5994 Change-Id: Id0db1e7cb0fe92f1eaef0b3a9ed2bea01b3f2a15 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9416 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp2
-rw-r--r--filelist.json10
-rw-r--r--src/cpu/kernels/CpuElementwiseUnaryKernel.cpp33
-rw-r--r--src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp165
-rw-r--r--src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp42
-rw-r--r--src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp42
-rw-r--r--src/cpu/kernels/elementwise_unary/list.h2
-rw-r--r--tests/validation/NEON/ElementwiseAbsoluteValue.cpp7
-rw-r--r--tests/validation/NEON/ElementwiseExpLayer.cpp8
-rw-r--r--tests/validation/NEON/ElementwiseLog.cpp9
-rw-r--r--tests/validation/NEON/ElementwiseNegation.cpp7
-rw-r--r--tests/validation/NEON/ElementwiseRound.cpp2
-rw-r--r--tests/validation/NEON/ElementwiseRsqrtLayer.cpp7
-rw-r--r--tests/validation/NEON/ElementwiseSin.cpp1
14 files changed, 320 insertions, 17 deletions
diff --git a/Android.bp b/Android.bp
index f315def2e6..a08bab6aac 100644
--- a/Android.bp
+++ b/Android.bp
@@ -487,6 +487,8 @@ cc_library_static {
"src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp",
"src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp",
+ "src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp",
+ "src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp",
"src/cpu/kernels/floor/neon/fp16.cpp",
"src/cpu/kernels/floor/neon/fp32.cpp",
"src/cpu/kernels/fuse_batch_normalization/generic/fp16.cpp",
diff --git a/filelist.json b/filelist.json
index c8e1ce0b9b..01659f55cf 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1427,8 +1427,14 @@
"integer": ["src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp"],
"fp32": ["src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp"],
"fp16": ["src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp"],
- "qasymm8": ["src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp"],
- "qasymm8_signed": ["src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp"]
+ "qasymm8": [
+ "src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp",
+ "src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp"
+ ],
+ "qasymm8_signed": [
+ "src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp",
+ "src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp"
+ ]
},
"sve": {
"common": ["src/cpu/kernels/elementwise_unary/generic/sve/impl.cpp" ],
diff --git a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 0adf28af63..4b61ee3a1e 100644
--- a/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -51,18 +51,18 @@ std::unique_ptr<uint8_t[]> q8_prepare_lut(ElementWiseUnary op, const ITensorInfo
ARM_COMPUTE_ERROR_ON(!is_data_type_quantized(src->data_type()));
ARM_COMPUTE_ERROR_ON(src->element_size() != 1);
- auto lut = std::unique_ptr<uint8_t[]>(new uint8_t[256]);
+ auto lut = std::unique_ptr<uint8_t[]>(new uint8_t[256]);
const auto is_signed = src->data_type() == DataType::QASYMM8_SIGNED;
- const auto src_qi = src->quantization_info().uniform();
- const auto dst_qi = dst->quantization_info().uniform();
+ const auto src_qi = src->quantization_info().uniform();
+ const auto dst_qi = dst->quantization_info().uniform();
const auto dst_min_fp = (((is_signed) ? -128 : 0) - dst_qi.offset) * dst_qi.scale;
const auto dst_max_fp = (((is_signed) ? 127 : 255) - dst_qi.offset) * dst_qi.scale;
for(int i = 0; i < 256; ++i)
{
- const auto in = (is_signed) ? dequantize_qasymm8_signed(static_cast<int8_t>(i), src_qi) : dequantize_qasymm8(i, src_qi);
- float result = 0;
+ const auto in = (is_signed) ? dequantize_qasymm8_signed(static_cast<int8_t>(i), src_qi) : dequantize_qasymm8(i, src_qi);
+ float result = 0;
switch(op)
{
@@ -101,7 +101,7 @@ std::unique_ptr<uint8_t[]> q8_prepare_lut(ElementWiseUnary op, const ITensorInfo
result = utility::clamp(result, dst_min_fp, dst_max_fp);
const auto out = (is_signed) ? static_cast<uint8_t>(quantize_qasymm8_signed(result, dst_qi)) : quantize_qasymm8(result, dst_qi);
- lut[i] = out;
+ lut[i] = out;
}
return lut;
@@ -174,7 +174,7 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai
},
REGISTER_QASYMM8_SVE(sve_q8_elementwise_unary),
&q8_prepare_lut,
- },
+ },
{
"neon_q8_elementwise_unary",
[](const DataTypeISASelectorData & data)
@@ -184,6 +184,25 @@ static const std::vector<CpuElementwiseUnaryKernel::ElementwiseUnaryKernel> avai
REGISTER_QASYMM8_NEON(neon_q8_elementwise_unary),
&q8_prepare_lut,
},
+#else // __aarch64__
+ {
+ "neon_qasymm8_signed_elementwise_unary",
+ [](const DataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::QASYMM8_SIGNED;
+ },
+ REGISTER_QASYMM8_NEON(neon_qasymm8_signed_elementwise_unary),
+ nullptr,
+ },
+ {
+ "neon_qasymm8_elementwise_unary",
+ [](const DataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::QASYMM8;
+ },
+ REGISTER_QASYMM8_NEON(neon_qasymm8_elementwise_unary),
+ nullptr,
+ },
#endif // __aarch64__
};
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp
index 30caa4ebeb..2b23e46f40 100644
--- a/src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/impl.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2022 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,6 +22,8 @@
* SOFTWARE.
*/
#include "src/cpu/kernels/elementwise_unary/generic/neon/impl.h"
+#include "arm_compute/core/Helpers.h"
+#include "src/core/NEON/NEAsymm.h"
namespace arm_compute
{
@@ -111,5 +113,166 @@ template void elementwise_op<__fp16>(const ITensor *in, ITensor *out, const Wind
template void elementwise_op<float>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op);
template void elementwise_op<int32_t>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op);
+template <>
+void elementwise_op<int8_t>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+{
+ const int window_step_x = 16;
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const UniformQuantizationInfo qi_in = in->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = out->info()->quantization_info().uniform();
+ const auto min_clamped_value = vdupq_n_f32((-128 - qi_out.offset) * qi_out.scale);
+ const auto max_clamped_value = vdupq_n_f32((127 - qi_out.offset) * qi_out.scale);
+ Window win = window;
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(in, win);
+ Iterator output(out, win);
+
+ execute_window_loop(win, [&](const Coordinates &)
+ {
+ int8x16_t vout;
+ auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+ const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
+ const auto vconst_0_f32 = vdupq_n_f32(0);
+ auto clamped_value = (op == ElementWiseUnary::LOG) ? min_clamped_value : max_clamped_value;
+
+ int x = window_start_x;
+ for(; x <= window_end_x - window_step_x; x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+
+ // Perform activation
+ float32x4x4_t vtmp_deq =
+ {
+ {
+ elementwise_op_imp<float>(op, vin_deq.val[0]),
+ elementwise_op_imp<float>(op, vin_deq.val[1]),
+ elementwise_op_imp<float>(op, vin_deq.val[2]),
+ elementwise_op_imp<float>(op, vin_deq.val[3]),
+ }
+ };
+
+ if((op == ElementWiseUnary::LOG) || (op == ElementWiseUnary::RSQRT))
+ {
+ vtmp_deq.val[0] = vbslq_f32(vcleq_f32(vin_deq.val[0], vconst_0_f32), clamped_value, vtmp_deq.val[0]);
+ vtmp_deq.val[1] = vbslq_f32(vcleq_f32(vin_deq.val[1], vconst_0_f32), clamped_value, vtmp_deq.val[1]);
+ vtmp_deq.val[2] = vbslq_f32(vcleq_f32(vin_deq.val[2], vconst_0_f32), clamped_value, vtmp_deq.val[2]);
+ vtmp_deq.val[3] = vbslq_f32(vcleq_f32(vin_deq.val[3], vconst_0_f32), clamped_value, vtmp_deq.val[3]);
+ }
+
+ // Re-quantize to new output space
+ vout = vquantize_signed(vtmp_deq, qi_out);
+ wrapper::vstore(output_ptr + x, vout);
+ }
+ for(; x < window_end_x; ++x)
+ {
+ qasymm8_signed_t in = *(reinterpret_cast<const qasymm8_signed_t *>(input_ptr + x));
+ qasymm8_signed_t tmp = 0;
+ float tmp_f = dequantize_qasymm8_signed(in, qi_in);
+ if(tmp_f <= 0.0)
+ {
+ if(op == ElementWiseUnary::LOG)
+ {
+ tmp_f = (-128 - qi_out.offset) * qi_out.scale;
+ }
+ else if(op == ElementWiseUnary::RSQRT)
+ {
+ tmp_f = (127 - qi_out.offset) * qi_out.scale;
+ }
+ }
+ else
+ {
+ tmp_f = elementwise_op_scalar_imp<float>(op, tmp_f);
+ }
+ tmp = quantize_qasymm8_signed(tmp_f, qi_out, RoundingPolicy::TO_ZERO); // Set rounding policy TO_ZERO to be compatible with vquantize_signed() used above that follow same policy for armv7a.
+ // For aarch64 LUT is used and rounding to nearest is used
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+template <>
+void elementwise_op<uint8_t>(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op)
+{
+ const int window_step_x = 16;
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const UniformQuantizationInfo qi_in = in->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = out->info()->quantization_info().uniform();
+ const auto vconst_0_f32 = vdupq_n_f32(0);
+ const auto min_clamped_value = vdupq_n_f32((0 - qi_out.offset) * qi_out.scale);
+ const auto max_clamped_value = vdupq_n_f32((255 - qi_out.offset) * qi_out.scale);
+ Window win = window;
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(in, win);
+ Iterator output(out, win);
+
+ execute_window_loop(win, [&](const Coordinates &)
+ {
+ uint8x16_t vout;
+ auto clamped_value = (op == ElementWiseUnary::LOG) ? min_clamped_value : max_clamped_value;
+ auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ int x = window_start_x;
+ for(; x <= window_end_x - window_step_x; x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+
+ // Perform activation
+ float32x4x4_t vtmp_deq =
+ {
+ {
+ elementwise_op_imp<float>(op, vin_deq.val[0]),
+ elementwise_op_imp<float>(op, vin_deq.val[1]),
+ elementwise_op_imp<float>(op, vin_deq.val[2]),
+ elementwise_op_imp<float>(op, vin_deq.val[3]),
+ }
+ };
+ if((op == ElementWiseUnary::LOG) || (op == ElementWiseUnary::RSQRT))
+ {
+ vtmp_deq.val[0] = vbslq_f32(vcleq_f32(vin_deq.val[0], vconst_0_f32), clamped_value, vtmp_deq.val[0]);
+ vtmp_deq.val[1] = vbslq_f32(vcleq_f32(vin_deq.val[1], vconst_0_f32), clamped_value, vtmp_deq.val[1]);
+ vtmp_deq.val[2] = vbslq_f32(vcleq_f32(vin_deq.val[2], vconst_0_f32), clamped_value, vtmp_deq.val[2]);
+ vtmp_deq.val[3] = vbslq_f32(vcleq_f32(vin_deq.val[3], vconst_0_f32), clamped_value, vtmp_deq.val[3]);
+ }
+
+ // Re-quantize to new output space
+ vout = vquantize(vtmp_deq, qi_out);
+ wrapper::vstore(output_ptr + x, vout);
+ }
+ for(; x < window_end_x; ++x)
+ {
+ qasymm8_t in = *(reinterpret_cast<const qasymm8_t *>(input_ptr + x));
+ qasymm8_t tmp = 0;
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ if(tmp_f <= 0.0)
+ {
+ if(op == ElementWiseUnary::LOG)
+ {
+ tmp_f = (0 - qi_out.offset) * qi_out.scale;
+ }
+ else if(op == ElementWiseUnary::RSQRT)
+ {
+ tmp_f = (255 - qi_out.offset) * qi_out.scale;
+ }
+ }
+ else
+ {
+ tmp_f = elementwise_op_scalar_imp<float>(op, tmp_f);
+ }
+ tmp = quantize_qasymm8(tmp_f, qi_out, RoundingPolicy::TO_ZERO);
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp
new file mode 100644
index 0000000000..d987f7747b
--- /dev/null
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Window.h"
+#include "src/cpu/kernels/elementwise_unary/generic/neon/impl.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+#ifndef __aarch64__
+// Fallback function to be used for armv7a, for aarch64 LUT is used
+void neon_qasymm8_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
+{
+ ARM_COMPUTE_UNUSED(lut);
+ return elementwise_op<uint8_t>(in, out, window, op);
+}
+#endif // #ifndef __aarch64__
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp b/src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp
new file mode 100644
index 0000000000..e00970a1e0
--- /dev/null
+++ b/src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Window.h"
+#include "src/cpu/kernels/elementwise_unary/generic/neon/impl.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+#ifndef __aarch64__
+// Fallback function to be used for armv7a, for aarch64 LUT is used
+void neon_qasymm8_signed_elementwise_unary(const ITensor *in, ITensor *out, const Window &window, ElementWiseUnary op, const uint8_t *lut)
+{
+ ARM_COMPUTE_UNUSED(lut);
+ return elementwise_op<int8_t>(in, out, window, op);
+}
+#endif // #ifndef __aarch64__
+
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/kernels/elementwise_unary/list.h b/src/cpu/kernels/elementwise_unary/list.h
index 04c3bb6bcb..c1cfbb8a3a 100644
--- a/src/cpu/kernels/elementwise_unary/list.h
+++ b/src/cpu/kernels/elementwise_unary/list.h
@@ -42,6 +42,8 @@ DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp32_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(neon_fp16_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(neon_s32_elementwise_unary);
DECLARE_ELEMETWISE_UNARY_KERNEL(neon_q8_elementwise_unary);
+DECLARE_ELEMETWISE_UNARY_KERNEL(neon_qasymm8_signed_elementwise_unary);
+DECLARE_ELEMETWISE_UNARY_KERNEL(neon_qasymm8_elementwise_unary);
#undef DECLARE_ELEMETWISE_UNARY_KERNEL
diff --git a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
index 7f6a6a5bb2..0667ac73f9 100644
--- a/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
+++ b/tests/validation/NEON/ElementwiseAbsoluteValue.cpp
@@ -46,8 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(__aarch64__)
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
} // namespace
TEST_SUITE(NEON)
@@ -136,8 +141,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEAbsLayerQuantizedFixture<int8_t>, framework::
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // AbsLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseExpLayer.cpp b/tests/validation/NEON/ElementwiseExpLayer.cpp
index e8940c5385..31cd78626f 100644
--- a/tests/validation/NEON/ElementwiseExpLayer.cpp
+++ b/tests/validation/NEON/ElementwiseExpLayer.cpp
@@ -46,8 +46,15 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(__aarch64__)
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
+
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(ExpLayer)
@@ -111,7 +118,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEExpLayerQuantizedFixture<int8_t>, framework::
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // ExpLayer
diff --git a/tests/validation/NEON/ElementwiseLog.cpp b/tests/validation/NEON/ElementwiseLog.cpp
index 49a88ced1c..320ece2e73 100644
--- a/tests/validation/NEON/ElementwiseLog.cpp
+++ b/tests/validation/NEON/ElementwiseLog.cpp
@@ -46,8 +46,15 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+#if defined(__aarch64__)
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
+
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(LogLayer)
@@ -118,8 +125,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NELogLayerQuantizedFixture<int8_t>, framework::
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // LogLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseNegation.cpp b/tests/validation/NEON/ElementwiseNegation.cpp
index 038058c70c..5b8ae8fc64 100644
--- a/tests/validation/NEON/ElementwiseNegation.cpp
+++ b/tests/validation/NEON/ElementwiseNegation.cpp
@@ -46,8 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(__aarch64__)
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(NegLayer)
@@ -142,8 +147,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NENegLayerQuantizedFixture<int8_t>, framework::
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // NegLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseRound.cpp b/tests/validation/NEON/ElementwiseRound.cpp
index a6ff47c830..620618cb0b 100644
--- a/tests/validation/NEON/ElementwiseRound.cpp
+++ b/tests/validation/NEON/ElementwiseRound.cpp
@@ -114,8 +114,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERoundLayerQuantizedFixture<int8_t>, framework
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // RoundLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
index 1d291ac6dc..80788c893f 100644
--- a/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
+++ b/tests/validation/NEON/ElementwiseRsqrtLayer.cpp
@@ -46,8 +46,13 @@ RelativeTolerance<float> tolerance_fp32(0.000001f);
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<float> tolerance_fp16(0.01f);
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#if defined(__aarch64__)
constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);
constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(0);
+#else // #if !defined(__aarch64__)
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1); // There is difference of 1, because quantizing in reference uses round policy "TO_NEAREST_UP", where the armv7a neon kernel uses "TO_ZERO"
+constexpr AbsoluteTolerance<int8_t> tolerance_qasymm8_signed(1);
+#endif // #if !defined(__aarch64__)
} // namespace
TEST_SUITE(NEON)
TEST_SUITE(RsqrtLayer)
@@ -131,8 +136,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NERsqrtLayerQuantizedFixture<int8_t>, framework
validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
}
TEST_SUITE_END() // QASYMM8_SIGNED
-
TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // RsqrtLayer
TEST_SUITE_END() // Neon
} // namespace validation
diff --git a/tests/validation/NEON/ElementwiseSin.cpp b/tests/validation/NEON/ElementwiseSin.cpp
index 76f4c50b46..9c2d7ae268 100644
--- a/tests/validation/NEON/ElementwiseSin.cpp
+++ b/tests/validation/NEON/ElementwiseSin.cpp
@@ -120,7 +120,6 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NESinLayerQuantizedFixture<int8_t>, framework::
TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
-
TEST_SUITE_END() // SinLayer
TEST_SUITE_END() // Neon
} // namespace validation