aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-06-09 18:37:19 +0100
committerSiCong Li <sicong.li@arm.com>2020-06-11 09:12:49 +0000
commiteb727f4f7afaa0a5ac5c630277086d912b128e55 (patch)
tree77940b359b692e04ec6586d724820e275423bb3d /src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
parenta32e2aef81cfcba9f5ae1770ceeb4a8d26fdc1f4 (diff)
downloadComputeLibrary-eb727f4f7afaa0a5ac5c630277086d912b128e55.tar.gz
COMPMID-3523: Fix NEDepthConvertLayerKernel f16 casting
* Force F16->QASYMM8 AND F16->QASYMM8_SIGNED saturation * Fix S32->F16 casting Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: Ic4be3865794947c577897cd9ad8554be4ebfe9bc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3324 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp10
1 files changed, 5 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
index 79dc2cb585..cbb746cbf9 100644
--- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -849,7 +849,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
const float16_t scale_s = 1 << _shift;
const float16x8_t scale = vdupq_n_f16(scale_s);
- /* Up-conversion F16 -> QASYMM8_SIGNED */
+ /* Down-conversion F16 -> QASYMM8_SIGNED (Always saturating) */
execute_window_loop(win, [&](const Coordinates &)
{
const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
@@ -872,7 +872,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) * scale_s);
+ *(output_ptr + x) = utils::cast::saturate_cast<int8_t>(*(input_ptr + x) * scale_s);
}
},
input, output);
@@ -884,7 +884,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
const float16_t scale_s = 1 << _shift;
const float16x8_t scale = vdupq_n_f16(scale_s);
- /* Up-conversion F16 -> U8 */
+ /* Down-conversion F16 -> QASYMM8/U8 (Always saturating) */
execute_window_loop(win, [&](const Coordinates &)
{
const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
@@ -907,7 +907,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + x) = static_cast<uint8_t>(*(input_ptr + x) * scale_s);
+ *(output_ptr + x) = utils::cast::saturate_cast<uint8_t>(*(input_ptr + x) * scale_s);
}
},
@@ -1215,7 +1215,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
// Compute left-over elements
for(; x < window_end_x; ++x)
{
- *(output_ptr + x) = static_cast<int8_t>(*(input_ptr + x) * scale_s);
+ *(output_ptr + x) = static_cast<float16_t>(*(input_ptr + x) * scale_s);
}
},
input, output);