diff options
author | SiCong Li <sicong.li@arm.com> | 2020-06-09 18:37:19 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2020-06-11 09:12:49 +0000 |
commit | eb727f4f7afaa0a5ac5c630277086d912b128e55 (patch) | |
tree | 77940b359b692e04ec6586d724820e275423bb3d /src/core | |
parent | a32e2aef81cfcba9f5ae1770ceeb4a8d26fdc1f4 (diff) | |
download | ComputeLibrary-eb727f4f7afaa0a5ac5c630277086d912b128e55.tar.gz |
COMPMID-3523: Fix NEDepthConvertLayerKernel f16 casting
* Force F16->QASYMM8 AND F16->QASYMM8_SIGNED saturation
* Fix S32->F16 casting
Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: Ic4be3865794947c577897cd9ad8554be4ebfe9bc
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3324
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index 79dc2cb585..cbb746cbf9 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -849,7 +849,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info const float16_t scale_s = 1 << _shift; const float16x8_t scale = vdupq_n_f16(scale_s); - /* Up-conversion F16 -> QASYMM8_SIGNED */ + /* Down-conversion F16 -> QASYMM8_SIGNED (Always saturating) */ execute_window_loop(win, [&](const Coordinates &) { const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr()); @@ -872,7 +872,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info // Compute left-over elements for(; x < window_end_x; ++x) { - *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) * scale_s); + *(output_ptr + x) = utils::cast::saturate_cast<int8_t>(*(input_ptr + x) * scale_s); } }, input, output); @@ -884,7 +884,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info const float16_t scale_s = 1 << _shift; const float16x8_t scale = vdupq_n_f16(scale_s); - /* Up-conversion F16 -> U8 */ + /* Down-conversion F16 -> QASYMM8/U8 (Always saturating) */ execute_window_loop(win, [&](const Coordinates &) { const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr()); @@ -907,7 +907,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info // Compute left-over elements for(; x < window_end_x; ++x) { - *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) * scale_s); + *(output_ptr + x) = utils::cast::saturate_cast<uint8_t>(*(input_ptr + x) * scale_s); } }, @@ -1215,7 +1215,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info // Compute left-over elements for(; x < window_end_x; ++x) { - *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) * scale_s); + *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) * scale_s); } }, input, output); |