aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEDepthConvertKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/NEDepthConvertKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEDepthConvertKernel.cpp83
1 files changed, 75 insertions, 8 deletions
diff --git a/src/core/NEON/kernels/NEDepthConvertKernel.cpp b/src/core/NEON/kernels/NEDepthConvertKernel.cpp
index 56612a7703..3c1a94df74 100644
--- a/src/core/NEON/kernels/NEDepthConvertKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertKernel.cpp
@@ -46,27 +46,35 @@ NEDepthConvertKernel::NEDepthConvertKernel()
void NEDepthConvertKernel::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32);
ARM_COMPUTE_ERROR_ON(shift >= 8);
ARM_COMPUTE_ERROR_ON(input == output);
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data_types must be different");
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::F32),
- "Only data_types supported [in] QS8 -> [out] F32");
-
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
&& output->info()->data_type() != DataType::S32),
"Only data_types supported [in] U8 -> [out] U16, S16, S32");
+ ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32,
+ "Only data_types supported [in] QS8 -> [out] F32");
+
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
"Only data_types supported [in] U16 -> [out] U8, U32");
ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
"Only data_types supported [in] S16 -> [out] U8, S32");
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8),
- "Only data_types supported [in] F32 -> [out] QS8");
+ ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && output->info()->data_type() != DataType::F32,
+ "Only data_types supported [in] QS16 -> [out] F32");
+
+ ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16),
+ "Only data_types supported [in] F32 -> [out] QS8, QS16");
+
+ // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
+ set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
_policy = policy;
_shift = shift;
@@ -346,6 +354,38 @@ void NEDepthConvertKernel::run(const Window &window)
}
break;
}
+ case DataType::QS16:
+ {
+ const int fixed_point_position = _input->info()->fixed_point_position();
+
+ switch(_output->info()->data_type())
+ {
+ case DataType::F32:
+ {
+ /* Up-conversion QS16 -> F32 */
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const int16x8x2_t texels =
+ {
+ {
+ vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr())),
+ vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
+ }
+ };
+
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels.val[0]), fixed_point_position));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels.val[0]), fixed_point_position));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels.val[1]), fixed_point_position));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels.val[1]), fixed_point_position));
+ },
+ input, output);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Output data type not supported");
+ }
+ break;
+ }
case DataType::F32:
{
switch(_output->info()->data_type())
@@ -366,13 +406,40 @@ void NEDepthConvertKernel::run(const Window &window)
}
};
- const qint8x16_t texels_s8 = vcvtq_qs8_f32(texels_f32, fixed_point_position);
+ const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, fixed_point_position);
vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), texels_s8);
},
input, output);
break;
}
+ case DataType::QS16:
+ {
+ const int fixed_point_position = _output->info()->fixed_point_position();
+ /* Down-conversion F32 -> QS16 */
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float32x4x2_t texels_f32_1 =
+ {
+ {
+ vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
+ vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
+ }
+ };
+ const float32x4x2_t texels_f32_2 =
+ {
+ {
+ vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
+ vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
+ }
+ };
+
+ vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, fixed_point_position));
+ vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, fixed_point_position));
+ },
+ input, output);
+ break;
+ }
default:
ARM_COMPUTE_ERROR("Output data type not supported");
}