From eb727f4f7afaa0a5ac5c630277086d912b128e55 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Tue, 9 Jun 2020 18:37:19 +0100 Subject: COMPMID-3523: Fix NEDepthConvertLayerKernel f16 casting * Force F16->QASYMM8 AND F16->QASYMM8_SIGNED saturation * Fix S32->F16 casting Signed-off-by: SiCong Li Change-Id: Ic4be3865794947c577897cd9ad8554be4ebfe9bc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3324 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index 79dc2cb585..cbb746cbf9 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -849,7 +849,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info const float16_t scale_s = 1 << _shift; const float16x8_t scale = vdupq_n_f16(scale_s); - /* Up-conversion F16 -> QASYMM8_SIGNED */ + /* Down-conversion F16 -> QASYMM8_SIGNED (Always saturating) */ execute_window_loop(win, [&](const Coordinates &) { const auto input_ptr = reinterpret_cast(input.ptr()); @@ -872,7 +872,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info // Compute left-over elements for(; x < window_end_x; ++x) { - *(output_ptr + x) = static_cast(*(input_ptr + x) * scale_s); + *(output_ptr + x) = utils::cast::saturate_cast(*(input_ptr + x) * scale_s); } }, input, output); @@ -884,7 +884,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info const float16_t scale_s = 1 << _shift; const float16x8_t scale = vdupq_n_f16(scale_s); - /* Up-conversion F16 -> U8 */ + /* Down-conversion F16 -> QASYMM8/U8 (Always saturating) */ execute_window_loop(win, [&](const Coordinates &) { const auto input_ptr = reinterpret_cast(input.ptr()); @@ -907,7 +907,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info // Compute left-over elements for(; x < window_end_x; ++x) { - *(output_ptr + x) = static_cast(*(input_ptr + x) * scale_s); + *(output_ptr + x) = utils::cast::saturate_cast(*(input_ptr + x) * scale_s); } }, @@ -1215,7 +1215,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info // Compute left-over elements for(; x < window_end_x; ++x) { - *(output_ptr + x) = static_cast(*(input_ptr + x) * scale_s); + *(output_ptr + x) = static_cast(*(input_ptr + x) * scale_s); } }, input, output); -- cgit v1.2.1