diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-12-10 13:33:18 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-12-11 18:04:21 +0000 |
commit | 338435607fc5291ff991f38aa15d4df5097d1a2d (patch) | |
tree | a0c89e9d5fd78e994594b27978b0c8b285d6da4b /src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp | |
parent | 453f9d9e9be824aa0e4f80abc9a051d8038b0e56 (diff) | |
download | ComputeLibrary-338435607fc5291ff991f38aa15d4df5097d1a2d.tar.gz |
COMPMID-2754: Add support for QASYMM8_SIGNED in NE kernels/functions.
Kernels/Functions extended support:
- NEBatchToSpaceLayerKernel/NEBatchToSpaceLayer
- NEChannelShuffleLayerKernel/NEChannelShuffleLayer
- NECol2ImKernel/NECol2Im
- NEConvertFullyConnectedWeightsKernel/NEConvertFullyConnectedWeights
- NECopyKernel/NECopy
- NEConvolutionLayerReshapeWeights
- NEDepthToSpaceLayerKernel/NEDepthToSpaceLayer
- NEFlattenLayerKernel/NEFlattenLayer
- NEFillBorderKernel
- NEFullyConnectedLayerReshapeWeights
- NEGatherKernel/NEGather
- NEGEMMInterleave4x4Kernel
- NEGEMMTranspose1xWKernel
- NEIm2ColKernel/NEIm2Col
- NEMemsetKernel
- NEPadLayerKernel/NEPadLayer
- NEPermuteKernel/NEPermute
- NEReverseKernel/NEReverse
- NEReorgLayerKernel/NEReorgLayer
- NEReshapeLayerKernel/NEReshapeLayer
- NESplit
- NESlice
- NEStridedSliceKernel/NEStridedSlice
- NESpaceToBatchLayerKernel/NESpaceToBatchLayer
- NESpaceToDepthLayerKernel/NESpaceToDepthLayerKernel
- NEStackLayerKernel/NEStackLayer
- NETileKernel/NETile
- NETransposeKernel/NETranspose
- NEWidthConcatenateLayerKernel/NEHeightConcatenateLayer
- NEHeightConcatenateLayerKernel/NEHeightConcatenateLayer
- NEDepthConcatenateLayerKernel/NEDepthConcatenateLayer
- NEBathConcatenateLayerKernel/NEBatchConcatenateLayer
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ia070332ad4c4dbced2541dc46f7f2f3a86833b65
Reviewed-on: https://review.mlplatform.org/c/2442
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 4377006f28..56ab11415c 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -66,6 +66,16 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo }, input, output); } + else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast<const int8_t *>(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast<int8_t *>(output_ptr + output.offset()); + vst1q_s8(out_ptr, vquantize_signed(vdequantize(vld1q_s8(in_ptr), input_qinfo), output_qinfo)); + }, + input, output); + } else { execute_window_loop(window, [&](const Coordinates &) @@ -102,7 +112,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); @@ -134,6 +144,9 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int case DataType::QASYMM8: _func = &depth_concat<uint8_t>; break; + case DataType::QASYMM8_SIGNED: + _func = &depth_concat<int8_t>; + break; case DataType::F16: _func = &depth_concat<uint16_t>; break; |