From 338435607fc5291ff991f38aa15d4df5097d1a2d Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 10 Dec 2019 13:33:18 +0000 Subject: COMPMID-2754: Add support for QASYMM8_SIGNED in NE kernels/functions. Kernels/Functions extended support: - NEBatchToSpaceLayerKernel/NEBatchToSpaceLayer - NEChannelShuffleLayerKernel/NEChannelShuffleLayer - NECol2ImKernel/NECol2Im - NEConvertFullyConnectedWeightsKernel/NEConvertFullyConnectedWeights - NECopyKernel/NECopy - NEConvolutionLayerReshapeWeights - NEDepthToSpaceLayerKernel/NEDepthToSpaceLayer - NEFlattenLayerKernel/NEFlattenLayer - NEFillBorderKernel - NEFullyConnectedLayerReshapeWeights - NEGatherKernel/NEGather - NEGEMMInterleave4x4Kernel - NEGEMMTranspose1xWKernel - NEIm2ColKernel/NEIm2Col - NEMemsetKernel - NEPadLayerKernel/NEPadLayer - NEPermuteKernel/NEPermute - NEReverseKernel/NEReverse - NEReorgLayerKernel/NEReorgLayer - NEReshapeLayerKernel/NEReshapeLayer - NESplit - NESlice - NEStridedSliceKernel/NEStridedSlice - NESpaceToBatchLayerKernel/NESpaceToBatchLayer - NESpaceToDepthLayerKernel/NESpaceToDepthLayerKernel - NEStackLayerKernel/NEStackLayer - NETileKernel/NETile - NETransposeKernel/NETranspose - NEWidthConcatenateLayerKernel/NEHeightConcatenateLayer - NEHeightConcatenateLayerKernel/NEHeightConcatenateLayer - NEDepthConcatenateLayerKernel/NEDepthConcatenateLayer - NEBathConcatenateLayerKernel/NEBatchConcatenateLayer Signed-off-by: Georgios Pinitas Change-Id: Ia070332ad4c4dbced2541dc46f7f2f3a86833b65 Reviewed-on: https://review.mlplatform.org/c/2442 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEBatchConcatenateLayerKernel.h | 5 +- .../core/NEON/kernels/NEBatchToSpaceLayerKernel.h | 8 +-- .../NEON/kernels/NEChannelShuffleLayerKernel.h | 4 +- arm_compute/core/NEON/kernels/NECol2ImKernel.h | 4 +- .../kernels/NEConvertFullyConnectedWeightsKernel.h | 5 +- arm_compute/core/NEON/kernels/NECopyKernel.h | 4 +- .../NEON/kernels/NEDepthConcatenateLayerKernel.h | 5 +- .../core/NEON/kernels/NEDepthToSpaceLayerKernel.h | 5 +- arm_compute/core/NEON/kernels/NEFillBorderKernel.h | 3 +- .../core/NEON/kernels/NEFlattenLayerKernel.h | 4 +- .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h | 5 +- arm_compute/core/NEON/kernels/NEGatherKernel.h | 5 +- .../NEON/kernels/NEHeightConcatenateLayerKernel.h | 5 +- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 4 +- arm_compute/core/NEON/kernels/NEMemsetKernel.h | 2 +- arm_compute/core/NEON/kernels/NEPadLayerKernel.h | 4 +- arm_compute/core/NEON/kernels/NEPermuteKernel.h | 5 +- arm_compute/core/NEON/kernels/NEReorgLayerKernel.h | 4 +- .../core/NEON/kernels/NEReshapeLayerKernel.h | 5 +- arm_compute/core/NEON/kernels/NEReverseKernel.h | 4 +- arm_compute/core/NEON/kernels/NESelectKernel.h | 7 +-- .../core/NEON/kernels/NESpaceToBatchLayerKernel.h | 9 ++-- .../core/NEON/kernels/NESpaceToDepthLayerKernel.h | 4 +- arm_compute/core/NEON/kernels/NEStackLayerKernel.h | 4 +- .../core/NEON/kernels/NEStridedSliceKernel.h | 6 +-- arm_compute/core/NEON/kernels/NETileKernel.h | 4 +- arm_compute/core/NEON/kernels/NETransposeKernel.h | 6 +-- .../NEON/kernels/NEWidthConcatenateLayerKernel.h | 5 +- arm_compute/core/PixelValue.h | 5 +- .../runtime/NEON/functions/NEBatchToSpaceLayer.h | 8 +-- .../runtime/NEON/functions/NEChannelShuffleLayer.h | 4 +- arm_compute/runtime/NEON/functions/NECol2Im.h | 4 +- .../runtime/NEON/functions/NEConcatenateLayer.h | 4 +- .../functions/NEConvertFullyConnectedWeights.h | 5 +- arm_compute/runtime/NEON/functions/NECopy.h | 4 +- .../runtime/NEON/functions/NEDepthToSpaceLayer.h | 5 +- arm_compute/runtime/NEON/functions/NEFill.h | 2 +- arm_compute/runtime/NEON/functions/NEFillBorder.h | 5 +- .../runtime/NEON/functions/NEFlattenLayer.h | 4 +- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 4 +- .../runtime/NEON/functions/NEGEMMTranspose1xW.h | 5 +- arm_compute/runtime/NEON/functions/NEGather.h | 6 +-- arm_compute/runtime/NEON/functions/NEIm2Col.h | 5 +- arm_compute/runtime/NEON/functions/NEPadLayer.h | 8 +-- arm_compute/runtime/NEON/functions/NEPermute.h | 5 +- arm_compute/runtime/NEON/functions/NEReorgLayer.h | 4 +- .../runtime/NEON/functions/NEReshapeLayer.h | 5 +- arm_compute/runtime/NEON/functions/NEReverse.h | 4 +- arm_compute/runtime/NEON/functions/NESelect.h | 4 +- arm_compute/runtime/NEON/functions/NESlice.h | 4 +- .../runtime/NEON/functions/NESpaceToBatchLayer.h | 8 +-- .../runtime/NEON/functions/NESpaceToDepthLayer.h | 4 +- arm_compute/runtime/NEON/functions/NESplit.h | 4 +- arm_compute/runtime/NEON/functions/NEStackLayer.h | 4 +- .../runtime/NEON/functions/NEStridedSlice.h | 4 +- arm_compute/runtime/NEON/functions/NETile.h | 4 +- arm_compute/runtime/NEON/functions/NETranspose.h | 4 +- .../NEON/kernels/NEBatchConcatenateLayerKernel.cpp | 16 ++++-- .../NEON/kernels/NEBatchToSpaceLayerKernel.cpp | 1 + .../NEON/kernels/NEChannelShuffleLayerKernel.cpp | 7 +-- src/core/NEON/kernels/NECol2ImKernel.cpp | 1 + .../NEConvertFullyConnectedWeightsKernel.cpp | 14 +++--- src/core/NEON/kernels/NECopyKernel.cpp | 1 + .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 15 +++++- .../NEON/kernels/NEDepthToSpaceLayerKernel.cpp | 1 + src/core/NEON/kernels/NEFillBorderKernel.cpp | 6 +-- src/core/NEON/kernels/NEFlattenLayerKernel.cpp | 6 +-- .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 1 + src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp | 5 +- src/core/NEON/kernels/NEGatherKernel.cpp | 58 +++++++++++----------- .../kernels/NEHeightConcatenateLayerKernel.cpp | 19 ++++--- src/core/NEON/kernels/NEPadLayerKernel.cpp | 2 + src/core/NEON/kernels/NEPermuteKernel.cpp | 1 + src/core/NEON/kernels/NEReorgLayerKernel.cpp | 6 +-- src/core/NEON/kernels/NEReshapeLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEReverseKernel.cpp | 6 +-- .../NEON/kernels/NESpaceToBatchLayerKernel.cpp | 2 + .../NEON/kernels/NESpaceToDepthLayerKernel.cpp | 1 + src/core/NEON/kernels/NEStackLayerKernel.cpp | 4 +- src/core/NEON/kernels/NEStridedSliceKernel.cpp | 6 +-- src/core/NEON/kernels/NETileKernel.cpp | 3 +- src/core/NEON/kernels/NETransposeKernel.cpp | 5 +- .../NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 19 ++++--- src/runtime/NEON/functions/NECol2Im.cpp | 7 +-- src/runtime/NEON/functions/NECopy.cpp | 7 +-- src/runtime/NEON/functions/NEFillBorder.cpp | 7 +-- src/runtime/NEON/functions/NEFlattenLayer.cpp | 9 ++-- src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 7 +-- src/runtime/NEON/functions/NEGather.cpp | 1 - src/runtime/NEON/functions/NEIm2Col.cpp | 7 +-- src/runtime/NEON/functions/NEPermute.cpp | 7 +-- src/runtime/NEON/functions/NEReshapeLayer.cpp | 7 +-- src/runtime/NEON/functions/NESelect.cpp | 4 +- src/runtime/NEON/functions/NETranspose.cpp | 9 ++-- tests/validation/NEON/BatchConcatenateLayer.cpp | 10 ++-- tests/validation/NEON/DepthConcatenateLayer.cpp | 10 ++-- tests/validation/NEON/HeightConcatenateLayer.cpp | 13 +++-- tests/validation/NEON/WidthConcatenateLayer.cpp | 10 ++-- tests/validation/reference/ConcatenateLayer.cpp | 23 +++++++-- 99 files changed, 347 insertions(+), 272 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h index 69223eea6a..e3a7847025 100644 --- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h @@ -29,6 +29,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the batch concatenate kernel. @@ -55,7 +56,7 @@ public: ~NEBatchConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Input tensor. Data types supported: All. * @param[in] batch_offset The offset on axis # 3. * @param[in,out] output Output tensor. Data types supported: Same as @p input. * @@ -66,7 +67,7 @@ public: void configure(const ITensor *input, unsigned int batch_offset, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Input tensor info. Data types supported: All. * @param[in] batch_offset The offset on axis # 3. * @param[in] output Output tensor info. Data types supported: Same as @p input. * diff --git a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h index cd1fa7e443..e9bbf4eb03 100644 --- a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h @@ -52,14 +52,14 @@ public: ~NEBatchToSpaceLayerKernel() = default; /** Initialise the kernel's inputs and output. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); /** Initialise the kernel's inputs and output (Static block shape). * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output. Data types supported: same as @p input @@ -67,7 +67,7 @@ public: void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[in] output Tensor output. Data types supported: same as @p input * @@ -76,7 +76,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape). * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[in] output Tensor output. Data types supported: same as @p input diff --git a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h index b6ec60030f..71659c4fcb 100644 --- a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h @@ -53,14 +53,14 @@ public: ~NEChannelShuffleLayerKernel() = default; /** Configure function's inputs and outputs. * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. */ void configure(const ITensor *input, ITensor *output, unsigned int num_groups); /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. * diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h index 7c19f08b3b..9aa1062622 100644 --- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h +++ b/arm_compute/core/NEON/kernels/NECol2ImKernel.h @@ -72,7 +72,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. Data types supported: Any + * @param[in] input The input tensor to convert. Data types supported: All * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], * while the rest represent batch of outputs. Data types supported: Same as @p input * @param[in] convolved_dims Output convolved dimensions. @@ -80,7 +80,7 @@ public: void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel * - * @param[in] input The input tensor to convert. Data types supported: Any + * @param[in] input The input tensor to convert. Data types supported: All * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], * while the rest represent batch of outputs. Data types supported: Same as @p input * @param[in] convolved_dims Output convolved dimensions. diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h index c54339da72..d45191949a 100644 --- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h +++ b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa. @@ -59,7 +60,7 @@ public: ~NEConvertFullyConnectedWeightsKernel() = default; /** Set the input and output tensor. * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. @@ -67,7 +68,7 @@ public: void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h index db3f6a8ae8..d2dbbaef98 100644 --- a/arm_compute/core/NEON/kernels/NECopyKernel.h +++ b/arm_compute/core/NEON/kernels/NECopyKernel.h @@ -51,14 +51,14 @@ public: NECopyKernel &operator=(NECopyKernel &&) = default; /** Initialize the kernel's input, output. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All * @param[out] output Destination tensor. Data types supported: same as @p input. * @param[in] padding (Optional) Padding to be applied to the input tensor */ void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList()); /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All * @param[in] output Destination tensor. Data types supported: same as @p input. * @param[in] padding (Optional) Padding to be applied to the input tensor * diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h index 4fcaa24707..ddbd0983e4 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h @@ -29,6 +29,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the depth concatenate kernel. @@ -55,7 +56,7 @@ public: ~NEDepthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32. + * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] depth_offset The offset on the Z axis. * @param[in,out] output Output tensor. Data types supported: Same as @p input. * @@ -66,7 +67,7 @@ public: void configure(const ITensor *input, unsigned int depth_offset, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel * - * @param[in] input Input tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] depth_offset The offset on the Z axis. * @param[in] output Output tensor info. Data types supported: Same as @p input. * diff --git a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h index f27a56c779..3129ea9fb9 100644 --- a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the depth to space kernel */ @@ -52,14 +53,14 @@ public: ~NEDepthToSpaceLayerKernel() = default; /** Initialise the kernel's inputs and output. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape x value. */ void configure(const ITensor *input, ITensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel. * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All * @param[in] output Tensor output info. Data types supported: same as @p input * @param[in] block_shape Block shape value. * diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h index b345ec4cc9..0c852e8232 100644 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -30,6 +30,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the kernel to fill borders */ @@ -57,7 +58,7 @@ public: * * @note This kernel fills the borders within the XY-planes. * - * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QASYMM8/S16/S32/F32. + * @param[in,out] tensor Tensor to process. Data types supported: All. * @param[in] border_size Size of the border to fill in elements. * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h index 8a7597a929..ba2f99857f 100644 --- a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h @@ -54,7 +54,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * The dimensions above the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input */ @@ -62,7 +62,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel * * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * The dimensions above the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input * diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h index 0e2cf0e82d..967a1b73dc 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) @@ -74,13 +75,13 @@ public: } /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: same as @p input. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor info. Data types supported: All * @param[in] output Output tensor info. Data type supported: same as @p input. * * @return a status diff --git a/arm_compute/core/NEON/kernels/NEGatherKernel.h b/arm_compute/core/NEON/kernels/NEGatherKernel.h index e619ee3574..bfef40b53b 100644 --- a/arm_compute/core/NEON/kernels/NEGatherKernel.h +++ b/arm_compute/core/NEON/kernels/NEGatherKernel.h @@ -30,6 +30,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Kernel to perform other operation on NEON */ @@ -59,7 +60,7 @@ public: } /** Initialise the kernel's inputs and outputs * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 @@ -67,7 +68,7 @@ public: void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0 diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h index 91370bca41..be81f2e963 100644 --- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h @@ -30,6 +30,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the height concatenate kernel. @@ -56,7 +57,7 @@ public: ~NEHeightConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[in] height_offset The starting offset on the Y axis for the output tensor. * @param[in,out] output Output tensor. Data types supported: Same as @p input. * @@ -64,7 +65,7 @@ public: void configure(const ITensor *input, unsigned int height_offset, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor info. Data types supported: All * @param[in] height_offset The starting offset on the Y axis for the output tensor. * @param[in] output Output tensor info. Data types supported: Same as @p input. * diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index c9d22da254..8ee9660b95 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -77,7 +77,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * Note: QASYMM8 works only for has_bias = false * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). @@ -91,7 +91,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * Note: QASYMM8 works only for has_bias = false * @param[in] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). diff --git a/arm_compute/core/NEON/kernels/NEMemsetKernel.h b/arm_compute/core/NEON/kernels/NEMemsetKernel.h index 1ff302d6ed..b4bcd11b82 100644 --- a/arm_compute/core/NEON/kernels/NEMemsetKernel.h +++ b/arm_compute/core/NEON/kernels/NEMemsetKernel.h @@ -55,7 +55,7 @@ public: NEMemsetKernel &operator=(NEMemsetKernel &&) = default; /** Initialise the kernel's tensor and filling value * - * @param[in,out] tensor Input tensor to fill. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in,out] tensor Input tensor to fill. Supported data types: All * @param[in] constant_value The value used to fill the planes of the tensor */ void configure(ITensor *tensor, const PixelValue &constant_value); diff --git a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h index 8d58fa9c65..4cbefbd1e3 100644 --- a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h @@ -56,7 +56,7 @@ public: /** Initialize the function * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. @@ -67,7 +67,7 @@ public: void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. * - * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Output tensor info. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h index 794259c262..89dc4e6fc7 100644 --- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h +++ b/arm_compute/core/NEON/kernels/NEPermuteKernel.h @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** NEON kernel to perform tensor permutation. @@ -58,7 +59,7 @@ public: * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The input tensor to permute. Data types supported: All * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] perm Permutation vector */ @@ -67,7 +68,7 @@ public: * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The input tensor to permute. Data types supported: All * @param[in] output The output tensor. Data types supported: Same as @p input * @param[in] perm Permutation vector * diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h index 195f0c2336..9277ddbe47 100644 --- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h @@ -53,7 +53,7 @@ public: ~NEReorgLayerKernel() = default; /** Set the input and output of the kernel * - * @param[in] input Source tensor. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: All * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] stride Stride to be used during data re-organization. * It defines the spatial distance between 2 consecutive pixels in the x and y direction @@ -62,7 +62,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel * - * @param[in] input Source tensor info. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: All * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] stride Stride to be used during data re-organization * It defines the spatial distance between 2 consecutive pixels in the x and y direction diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h index 2871bc2fb2..fccf2685a8 100644 --- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the kernel to perform tensor reshaping */ @@ -40,14 +41,14 @@ public: } /** Set the input and output of the kernel * - * @param[in] input Source tensor. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: All * @param[out] output Destination tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel * - * @param[in] input Source tensor info. Data type supported: U8/S8/U16/S16/QASYMM8/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: All * @param[in] output Destination tensor info. Data type supported: Same as @p input * * @return a status diff --git a/arm_compute/core/NEON/kernels/NEReverseKernel.h b/arm_compute/core/NEON/kernels/NEReverseKernel.h index a8a1fdaac8..516653b70d 100644 --- a/arm_compute/core/NEON/kernels/NEReverseKernel.h +++ b/arm_compute/core/NEON/kernels/NEReverseKernel.h @@ -52,7 +52,7 @@ public: ~NEReverseKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 */ @@ -60,7 +60,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor info. Data types supported: All * @param[in] output Output tensor info. Data type supported: Same as @p input * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 * diff --git a/arm_compute/core/NEON/kernels/NESelectKernel.h b/arm_compute/core/NEON/kernels/NESelectKernel.h index 1711246b27..51c8543ddc 100644 --- a/arm_compute/core/NEON/kernels/NESelectKernel.h +++ b/arm_compute/core/NEON/kernels/NESelectKernel.h @@ -29,6 +29,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the select kernel @@ -60,7 +61,7 @@ public: /** Common signature for all the specialised elementwise functions * * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] x First input tensor. Data types supported: All. * @param[out] y Second input tensor. Data types supported: Same as @p x * @param[in] output Output tensor. Data types supported: Same as @p x */ @@ -69,7 +70,7 @@ public: /** Validate the argument passed to the kernel * * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] x First input tensor. Data types supported: All. * @param[in] y Second input tensor. Data types supported: Same as @p x * @param[in] output Output tensor. Data types supported: Same as @p x. * @@ -84,7 +85,7 @@ private: /** Common signature for all the specialised select functions * * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] x First input tensor. Data types supported: All. * @param[in] y Second input tensor. Data types supported: Same as @p x * @param[in] output Output tensor. Data types supported: Same as @p x. */ diff --git a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h index 79c321f919..ec88f03df4 100644 --- a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h @@ -29,6 +29,7 @@ namespace arm_compute { +// Forward declaration class ITensor; /** Interface for the space to batch kernel */ @@ -53,7 +54,7 @@ public: ~NESpaceToBatchLayerKernel() = default; /** Initialise the kernel's inputs and output. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input @@ -61,7 +62,7 @@ public: void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); /** Initialise the kernel's input and output. (Static block shape and paddings) * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[in] padding_left The left padding of the output tensor. @@ -71,7 +72,7 @@ public: void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 * @param[in] output Tensor output. Data types supported: same as @p input @@ -81,7 +82,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings) * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[in] padding_left The left padding of the output tensor. diff --git a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h index fe783a7159..87f8c67635 100644 --- a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h @@ -53,14 +53,14 @@ public: ~NESpaceToDepthLayerKernel() = default; /** Initialise the kernel's inputs and output. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value */ void configure(const ITensor *input, ITensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] output Tensor output info. Data types supported: same as @p input * @param[in] block_shape Block shape value * diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h index 04ad5d10dc..c4dc53eac6 100644 --- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h @@ -56,7 +56,7 @@ public: * * @note Supported input tensor rank: up to 4 * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. * @param[in] idx_input Index of the input tensor in the list of tensors to stack. * All tensors in the list must have the same shape @@ -69,7 +69,7 @@ public: * * @note Supported input tensor rank: up to 4 * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor info. Data types supported: All * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. * @param[in] idx_input Index of the input tensor in the list of tensors to stack * All tensors in the list must have the same shape diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h index fc624f047b..6709619a62 100644 --- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h +++ b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h @@ -58,7 +58,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: All * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -72,11 +72,11 @@ public: const Coordinates &starts, const Coordinates &ends, const BiStrides &strides, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask); - /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel + /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: All * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). diff --git a/arm_compute/core/NEON/kernels/NETileKernel.h b/arm_compute/core/NEON/kernels/NETileKernel.h index 93afc80651..a64470ffd0 100644 --- a/arm_compute/core/NEON/kernels/NETileKernel.h +++ b/arm_compute/core/NEON/kernels/NETileKernel.h @@ -50,14 +50,14 @@ public: } /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Same as @p input * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. */ void configure(const ITensor *input, ITensor *output, const Multiples &multiples); /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor info. Data type supported: All. * @param[in] output Destination tensor info. Same as @p input * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. * diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h index d73813c846..a14dece0d6 100644 --- a/arm_compute/core/NEON/kernels/NETransposeKernel.h +++ b/arm_compute/core/NEON/kernels/NETransposeKernel.h @@ -57,13 +57,13 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[in] output Output tensor. Data type supported: Same as @p input * * @return a status @@ -76,7 +76,7 @@ public: private: /** Common signature for all the transpose functions * - * @param[in] input An input tensor. Data types supported: U8/S8/U16/S16/F16/U32/S32/F32 + * @param[in] input An input tensor. Data types supported: All * @param[out] output The output tensor. Data type supported: same as @p input * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h index ccd70a4ebd..f22f18f09f 100644 --- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h @@ -30,6 +30,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Interface for the width concatenate kernel. @@ -56,7 +57,7 @@ public: ~NEWidthConcatenateLayerKernel() = default; /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[in] width_offset The offset on the X axis. * @param[in,out] output Output tensor. Data types supported: Same as @p input. * @@ -64,7 +65,7 @@ public: void configure(const ITensor *input, unsigned int width_offset, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor info. Data types supported: All * @param[in] width_offset The offset on the X axis. * @param[in] output Output tensor info. Data types supported: Same as @p input. * diff --git a/arm_compute/core/PixelValue.h b/arm_compute/core/PixelValue.h index f39c8670fd..8c2ab92ad9 100644 --- a/arm_compute/core/PixelValue.h +++ b/arm_compute/core/PixelValue.h @@ -59,6 +59,9 @@ public: case DataType::QASYMM8: value.u8 = quantize_qasymm8(static_cast(v), qinfo); break; + case DataType::QASYMM8_SIGNED: + value.u8 = quantize_qasymm8_signed(static_cast(v), qinfo); + break; case DataType::QSYMM8: value.s8 = quantize_qsymm8(static_cast(v), qinfo); break; @@ -311,5 +314,5 @@ public: return val; } }; -} +} // namespace arm_compute #endif /* ARM_COMPUTE_PIXELVALUE_H */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h index f0a33a71f2..2a62530246 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h @@ -40,14 +40,14 @@ class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder public: /** Set the input and output tensors. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input */ void configure(const ITensor *input, const ITensor *block_shape, ITensor *output); /** Set the input and output tensors. (Static block shape). * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output. Data types supported: same as @p input @@ -55,7 +55,7 @@ public: void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32 * @param[out] output Tensor output info. Data types supported: same as @p input * @@ -64,7 +64,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape). * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[out] output Tensor output info. Data types supported: same as @p input diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h index ecc1822f9d..716518a8da 100644 --- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h +++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h @@ -42,14 +42,14 @@ class NEChannelShuffleLayer : public INESimpleFunctionNoBorder public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. */ void configure(const ITensor *input, ITensor *output, unsigned int num_groups); /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayer * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups. * diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h index 8987c82714..5da0b91766 100644 --- a/arm_compute/runtime/NEON/functions/NECol2Im.h +++ b/arm_compute/runtime/NEON/functions/NECol2Im.h @@ -39,7 +39,7 @@ class NECol2Im : public INESimpleFunctionNoBorder public: /** Configure the col2im NEON kernel * - * @param[in] input The input tensor to convert. Data types supported: Any + * @param[in] input The input tensor to convert. Data types supported: All * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], * while the rest represent batch of outputs. Data types supported: Same as @p input * @param[in] convolved_dims Output convolved dimensions. @@ -47,7 +47,7 @@ public: void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims); /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im * - * @param[in] input The input tensor to convert. Data types supported: Any + * @param[in] input The input tensor to convert. Data types supported: All * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], * while the rest represent batch of outputs. Data types supported: Same as @p input * @param[in] convolved_dims Output convolved dimensions. diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h index 3591cfd9da..8207589680 100644 --- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h @@ -57,7 +57,7 @@ public: * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. * - * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32. + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Output tensor. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. */ @@ -68,7 +68,7 @@ public: * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis. * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel. * - * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32. + * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Output tensor info. Data types supported: Same as @p input. * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3. * diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h index 37b6b6c4dd..42f787090e 100644 --- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h +++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h @@ -32,6 +32,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */ @@ -42,7 +43,7 @@ public: NEConvertFullyConnectedWeights(); /** Initialize the function. * - * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All. * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. @@ -50,7 +51,7 @@ public: void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout); /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights * - * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All. * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input. * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer). * @param[in] data_layout The data layout the weights have been trained in. diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h index 7ad89e27d8..b03f408ad8 100644 --- a/arm_compute/runtime/NEON/functions/NECopy.h +++ b/arm_compute/runtime/NEON/functions/NECopy.h @@ -37,14 +37,14 @@ class NECopy : public INESimpleFunctionNoBorder public: /** Initialise the function's source and destination. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All * @param[out] output Output tensor. Data types supported: Same as @p input. * */ void configure(ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NECopy * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All * @param[in] output Output tensor. Data types supported: Same as @p input. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h index 56c237420b..3c21d1a33a 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h @@ -32,6 +32,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEDepthToSpaceLayerKernel. */ @@ -40,14 +41,14 @@ class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder public: /** Set the input and output tensors. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value. */ void configure(const ITensor *input, ITensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayer. * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All * @param[in] output Tensor output info. Data types supported: same as @p input * @param[in] block_shape Block shape x value. * diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h index e5badcb7b6..f8a15078c3 100644 --- a/arm_compute/runtime/NEON/functions/NEFill.h +++ b/arm_compute/runtime/NEON/functions/NEFill.h @@ -39,7 +39,7 @@ class NEFill : public INESimpleFunctionNoBorder public: /** Initialize the function * - * @param[in,out] tensor Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in,out] tensor Source tensor. Data types supported: All * @param[in] constant_value Constant value to use to fill tensor. */ void configure(ITensor *tensor, PixelValue constant_value); diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h index b8266d9d67..0ae04cbf00 100644 --- a/arm_compute/runtime/NEON/functions/NEFillBorder.h +++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h @@ -31,6 +31,7 @@ namespace arm_compute { +// Forward declaration class ITensor; /** Basic function to run @ref NEFillBorderKernel */ @@ -41,7 +42,7 @@ public: * * @note This function fills the borders within the XY-planes. * - * @param[in, out] input Source tensor. Data type supported: U8/S16/S32/F32 + * @param[in, out] input Source tensor. Data type supported: All * @param[in] border_width Width of the tensor border in pixels. * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. @@ -54,5 +55,5 @@ public: private: NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */ }; -} +} // namespace arm_compute #endif /*ARM_COMPUTE_NEFILLBORDER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index db73f52f4a..7b4801cd1c 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -37,7 +37,7 @@ class NEFlattenLayer : public INESimpleFunctionNoBorder public: /** Initialise the kernel's input and output. * - * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F16/F32 + * @param[in] input First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input */ @@ -46,7 +46,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayer * * @param[in] input First input tensor to flatten with at least 3 dimensions. - * The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * The dimensions above the third will be interpreted as batches. Data types supported: All * @param[out] output Output tensor with shape [w*h*d, input_batches] where: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input * diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 5eabdcdb6b..8150737ebe 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -49,13 +49,13 @@ class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder public: /** Set the input and output tensors. * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32. + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[out] output Destination tensor. Data type supported: Same as @p input. */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights * - * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32. + * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. * @param[in] output Destination tensor info. Data type supported: Same as @p input. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h index 8b66eb23b9..6a38490ed4 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -28,6 +28,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels: @@ -40,13 +41,13 @@ class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs, output * - * @param[in] input First input tensor. Data type supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32 + * @param[in] input First input tensor. Data type supported: All * @param[out] output Output tensor. Data type supported: same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW * - * @param[in] input First input tensor. Data type supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32 + * @param[in] input First input tensor. Data type supported: All * @param[in] output Output tensor. Data type supported: same as @p input * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h index 6b6072cd53..7ed45c0f15 100644 --- a/arm_compute/runtime/NEON/functions/NEGather.h +++ b/arm_compute/runtime/NEON/functions/NEGather.h @@ -30,6 +30,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEGatherKernel */ @@ -38,7 +39,7 @@ class NEGather : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * - * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis]) * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 @@ -47,7 +48,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel * - * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis]) * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Defaults to 0 @@ -56,7 +57,6 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis); }; - } // namespace arm_compute #endif /* ARM_COMPUTE_NEGATHER_H */ diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h index 475e1d6160..ac1b2674a5 100644 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h @@ -32,6 +32,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEIm2ColKernel */ @@ -43,7 +44,7 @@ public: /** Configure the im2col NEON kernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * Note: QASYMM8 works only for has_bias = false * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). @@ -57,7 +58,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 * Note: QASYMM8 works only for has_bias = false * @param[in] output The output tensor. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h index a17739385c..d3074e70bc 100644 --- a/arm_compute/runtime/NEON/functions/NEPadLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h @@ -53,7 +53,7 @@ public: NEPadLayer(); /** Initialize the function * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. @@ -64,7 +64,7 @@ public: void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT); /** Static function to check if given info will lead to a valid configuration of @ref NEPadLayer. * - * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor info. Data types supported: All. * @param[in] output Output tensor info. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. @@ -82,7 +82,7 @@ public: private: /** Configure kernels for when constant padding is used. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i] * specifies the front and the end padding in the i-th dimension. @@ -91,7 +91,7 @@ private: void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value); /** Configure functions for when reflect or symmetric padding is used. * - * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data types supported: All. * @param[out] output Output tensor. Data type supported: same as @p input */ void configure_reflect_symmetric_mode(ITensor *input, ITensor *output); diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h index 72e864283e..4651b30e8e 100644 --- a/arm_compute/runtime/NEON/functions/NEPermute.h +++ b/arm_compute/runtime/NEON/functions/NEPermute.h @@ -30,6 +30,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEPermuteKernel */ @@ -40,7 +41,7 @@ public: * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The input tensor to permute. Data types supported: All * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] perm Permutation vector */ @@ -49,7 +50,7 @@ public: * * @note Arbitrary permutation vectors are supported with rank not greater than 4 * - * @param[in] input The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The input tensor to permute. Data types supported: All * @param[in] output The output tensor. Data types supported: Same as @p input * @param[in] perm Permutation vector * diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h index 5f28821a6c..8ef7f8a1b2 100644 --- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h @@ -38,7 +38,7 @@ class NEReorgLayer : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * - * @param[in] input First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32 + * @param[in] input First tensor input. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] stride Stride to be used during data re-organization * It defines the spatial distance between 2 consecutive pixels in the x and y direction @@ -47,7 +47,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEReorgLayer * - * @param[in] input First tensor info. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32 + * @param[in] input First tensor info. Data type supported: All * @param[in] output Output tensor info. Data type supported: Same as @p input * @param[in] stride Stride to be used during data re-organization * It defines the spatial distance between 2 consecutive pixels in the x and y direction diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h index c57387a0cd..d6643842d9 100644 --- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h +++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h @@ -29,6 +29,7 @@ namespace arm_compute { +// Forward declarations class ITensor; /** Basic function to run @ref NEReshapeLayerKernel */ @@ -37,14 +38,14 @@ class NEReshapeLayer : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and outputs * - * @param[in] input First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32 + * @param[in] input First tensor input. Data type supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer * - * @param[in] input First tensor info. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32 + * @param[in] input First tensor info. Data type supported: All * @param[in] output Output tensor info. Data type supported: Same as @p input * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h index 6414cbd4d1..ab5a5d0869 100644 --- a/arm_compute/runtime/NEON/functions/NEReverse.h +++ b/arm_compute/runtime/NEON/functions/NEReverse.h @@ -38,14 +38,14 @@ class NEReverse : public INESimpleFunctionNoBorder public: /** Initialize the function * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32 */ void configure(const ITensor *input, ITensor *output, const ITensor *axis); /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor info. Data types supported: All * @param[in] output Output tensor info. Data type supported: Same as @p input * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32 * diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h index 049267a692..6ac328080d 100644 --- a/arm_compute/runtime/NEON/functions/NESelect.h +++ b/arm_compute/runtime/NEON/functions/NESelect.h @@ -39,7 +39,7 @@ public: /** Initialise the kernel's inputs and output. * * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] x First input tensor. Data types supported: All. * @param[in] y Second input tensor. Data types supported: Same as @p x * @param[out] output Output tensor. Data types supported: Same as @p x. */ @@ -47,7 +47,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NESelect * * @param[in] c Condition input tensor. Data types supported: U8. - * @param[in] x First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] x First input tensor. Data types supported: All. * @param[in] y Second input tensor. Data types supported: Same as @p x * @param[in] output Output tensor. Data types supported: Same as @p x. * diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h index 179c64a420..834ec27a33 100644 --- a/arm_compute/runtime/NEON/functions/NESlice.h +++ b/arm_compute/runtime/NEON/functions/NESlice.h @@ -42,7 +42,7 @@ public: * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. * @note End indices are not inclusive unless negative. * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: All * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -56,7 +56,7 @@ public: * @note End coordinates can be negative, which represents the number of elements before the end of that dimension. * @note End indices are not inclusive unless negative. * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: All * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h index c433f183a0..75fa50c1b0 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h @@ -56,7 +56,7 @@ public: virtual ~NESpaceToBatchLayer() = default; /** Set the input and output tensors. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32 * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32 * @param[out] output Tensor output. Data types supported: same as @p input @@ -64,7 +64,7 @@ public: void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output); /** Set the input and output tensors. (Static block shape and paddings) * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[in] padding_left The left padding of the output tensor. @@ -74,7 +74,7 @@ public: void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32 * @param[in] paddings paddings tensor info with shape [2, M]. Data types supported: S32 * @param[in] output Tensor output info. Data types supported: same as @p input @@ -84,7 +84,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings) * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] block_shape_x Block shape x value. * @param[in] block_shape_y Block shape y value. * @param[in] padding_left The left padding of the output tensor. diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h index ac93b48c0c..6a7a9c83a1 100644 --- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h +++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h @@ -55,14 +55,14 @@ public: virtual ~NESpaceToDepthLayer() = default; /** Set the input and output tensors. * - * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All. * @param[out] output Tensor output. Data types supported: same as @p input * @param[in] block_shape Block shape value */ void configure(const ITensor *input, ITensor *output, int32_t block_shape); /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayer (Static block shape and paddings) * - * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All. * @param[in] output Tensor output info. Data types supported: same as @p input * @param[in] block_shape Block shape value * diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h index 26ff4059f1..e4d62048e6 100644 --- a/arm_compute/runtime/NEON/functions/NESplit.h +++ b/arm_compute/runtime/NEON/functions/NESplit.h @@ -45,7 +45,7 @@ public: NESplit(); /** Initialise the kernel's input and outputs. * - * @param[in] input The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor. Data types supported: All * @param[out] outputs A vector containing the output tensors. Data types supported: Same as @p input. * The output tensors should match the input tensor dimensions for all shape dimensions apart * from the split dimension. @@ -54,7 +54,7 @@ public: void configure(const ITensor *input, const std::vector &outputs, unsigned int axis); /** Static function to check if given info will lead to a valid configuration of @ref NESplit * - * @param[in] input The input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor info. Data types supported: All * @param[in] outputs A vector containing the output tensors' info. Data types supported: Same as @p input. * The output tensors should match the input tensor dimensions for all shape dimensions apart * from the split dimension diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h index e83995619d..9288035060 100644 --- a/arm_compute/runtime/NEON/functions/NEStackLayer.h +++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h @@ -50,7 +50,7 @@ public: * * @note Supported input tensor rank: up to 4 * - * @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The vectors containing all the tensors with the same shape to stack. Data types supported: All * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. * Negative values wrap around * @param[out] output Output tensor. Data types supported: Same as @p input. @@ -60,7 +60,7 @@ public: * * @note Supported input tensor rank: up to 4 * - * @param[in] input The vectors containing all the tensors info with the same shape to stack. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The vectors containing all the tensors info with the same shape to stack. Data types supported: All * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions. * Negative values wrap around * @param[in] output Output tensor info. Data types supported: Same as @p input. diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h index d8c3918385..6d5e6392f5 100644 --- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h +++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h @@ -39,7 +39,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: All * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -57,7 +57,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: All * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h index c7a08961a4..14d4f221f1 100644 --- a/arm_compute/runtime/NEON/functions/NETile.h +++ b/arm_compute/runtime/NEON/functions/NETile.h @@ -38,14 +38,14 @@ class NETile : public INESimpleFunctionNoBorder public: /** Set the source, destination of the kernel * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor. Data type supported: All. * @param[out] output Destination tensor. Same as @p input * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. */ void configure(const ITensor *input, ITensor *output, const Multiples &multiples); /** Static function to check if given info will lead to a valid configuration of @ref NETile * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32. + * @param[in] input Source tensor info. Data type supported: All. * @param[in] output Destination tensor info. Same as @p input * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension. * diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h index 6148a67203..03c90e5b28 100644 --- a/arm_compute/runtime/NEON/functions/NETranspose.h +++ b/arm_compute/runtime/NEON/functions/NETranspose.h @@ -41,13 +41,13 @@ class NETranspose : public INESimpleFunctionNoBorder public: /** Initialise the kernel's inputs and output * - * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input Input tensor. Data types supported: All * @param[out] output Output tensor. Data type supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NETranspose * - * @param[in] input The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[in] input The input tensor. Data types supported: All * @param[in] output The output tensor. Data types supported: Same as @p input * * @return a status diff --git a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp index cfa98fb19a..3e250f5d2e 100644 --- a/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp @@ -66,6 +66,16 @@ void batch_concat(const ITensor *in, ITensor *out, int batch_offset, const Windo }, input, output); } + else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast(output_ptr + output.offset()); + vst1q_s8(out_ptr, vquantize_signed(vdequantize(vld1q_s8(in_ptr), input_qinfo), output_qinfo)); + }, + input, output); + } else { execute_window_loop(window, [&](const Coordinates &) @@ -102,10 +112,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, c { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); @@ -138,6 +145,7 @@ void NEBatchConcatenateLayerKernel::configure(const ITensor *input, unsigned int case DataType::S8: case DataType::U8: case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: _func = &batch_concat; break; case DataType::S16: diff --git a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp index 6211abcad0..d167cbb05d 100644 --- a/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp @@ -43,6 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_inf ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); // Validate output if initialized if(output->total_size() != 0) diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp index b2b0dbd789..45f6fd99fe 100644 --- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp @@ -39,12 +39,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) { // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, - 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW, DataLayout::NHWC); const unsigned int channels = input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)); diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index cea8782354..4f1b1d8ca2 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -43,6 +43,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims) { //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); // Validate configured output if(output->total_size() != 0) diff --git a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp index b6d166d30e..ab6636018f 100644 --- a/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Types.h" -using namespace arm_compute; - +namespace arm_compute +{ NEConvertFullyConnectedWeightsKernel::NEConvertFullyConnectedWeightsKernel() : _input(nullptr), _output(nullptr), _factor1(0), _factor2(0) { @@ -66,12 +66,9 @@ void NEConvertFullyConnectedWeightsKernel::configure(const ITensor *input, ITens Status NEConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() != 2); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) != original_input_shape.total_size_lower(3)); ARM_COMPUTE_RETURN_ERROR_ON(data_layout == DataLayout::UNKNOWN); @@ -124,3 +121,4 @@ void NEConvertFullyConnectedWeightsKernel::run(const Window &window, const Threa break; } } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NECopyKernel.cpp b/src/core/NEON/kernels/NECopyKernel.cpp index 83f3dded4f..6bf49549e2 100644 --- a/src/core/NEON/kernels/NECopyKernel.cpp +++ b/src/core/NEON/kernels/NECopyKernel.cpp @@ -38,6 +38,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList()) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(padding.size() > 4); // Validate output if initialized diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 4377006f28..56ab11415c 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -66,6 +66,16 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo }, input, output); } + else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast(output_ptr + output.offset()); + vst1q_s8(out_ptr, vquantize_signed(vdequantize(vld1q_s8(in_ptr), input_qinfo), output_qinfo)); + }, + input, output); + } else { execute_window_loop(window, [&](const Coordinates &) @@ -102,7 +112,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, c { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); @@ -134,6 +144,9 @@ void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int case DataType::QASYMM8: _func = &depth_concat; break; + case DataType::QASYMM8_SIGNED: + _func = &depth_concat; + break; case DataType::F16: _func = &depth_concat; break; diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp index df631c3c03..b51d0d66c5 100644 --- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp @@ -41,6 +41,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 2); diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index 13db1659ce..75d46c61d8 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -97,11 +97,9 @@ NEFillBorderKernel::NEFillBorderKernel() void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) { + ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON(tensor->info()->data_type() == DataType::UNKNOWN); _tensor = tensor; _border_size = border_size; diff --git a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp index 4840a95540..a48601f7b0 100644 --- a/src/core/NEON/kernels/NEFlattenLayerKernel.cpp +++ b/src/core/NEON/kernels/NEFlattenLayerKernel.cpp @@ -42,10 +42,8 @@ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index a9c04824ae..8f73bdb3a3 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -44,6 +44,7 @@ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); diff --git a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp index ea3d32e628..88104f7297 100644 --- a/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp @@ -54,10 +54,9 @@ TensorShape get_output_shape(const ITensorInfo *input) Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::U8, DataType::S8, - DataType::U16, DataType::S16, DataType::U32, DataType::S32, - DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); if(output->total_size() != 0) diff --git a/src/core/NEON/kernels/NEGatherKernel.cpp b/src/core/NEON/kernels/NEGatherKernel.cpp index 1e027b7292..0a7a8dfc21 100644 --- a/src/core/NEON/kernels/NEGatherKernel.cpp +++ b/src/core/NEON/kernels/NEGatherKernel.cpp @@ -52,6 +52,32 @@ void validate_indices(const ITensor *indices) } } +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); + ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); + + if(axis < 0) + { + axis += input->num_dimensions(); + } + + ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast(input->num_dimensions())); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), axis); + ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); + } + + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); + + return Status{}; +} } // namespace NEGatherKernel::NEGatherKernel() @@ -107,9 +133,7 @@ void NEGatherKernel::gather_n_axis(const Window &window, const ThreadInfo &info) void NEGatherKernel::configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices); - ARM_COMPUTE_ERROR_ON(indices->info()->num_dimensions() != 1); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis)); _input = input; _indices = indices; @@ -154,7 +178,7 @@ void NEGatherKernel::configure(const ITensor *input, const ITensor *indices, ITe } // Output auto initialization if not yet initialized TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->info()->tensor_shape(), indices->info()->tensor_shape(), _axis); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); // Create window Window win = calculate_max_window(*output->info(), Steps()); @@ -165,31 +189,7 @@ void NEGatherKernel::configure(const ITensor *input, const ITensor *indices, ITe Status NEGatherKernel::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, indices, output); - ARM_COMPUTE_RETURN_ERROR_ON(indices->num_dimensions() > 1); - ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); - - if(axis < 0) - { - axis += input->num_dimensions(); - } - - ARM_COMPUTE_RETURN_ERROR_ON(0 > axis || axis >= static_cast(input->num_dimensions())); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - - if(output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); - TensorShape output_shape = arm_compute::misc::shape_calculator::compute_gather_shape(input->tensor_shape(), indices->tensor_shape(), axis); - ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size()); - } - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32, DataType::S32); - + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, indices, output, axis)); return Status{}; } diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index 18be1d5bb7..4f7ae8c0b1 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -36,8 +36,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) @@ -58,10 +58,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, DataType::F16, - DataType::U32, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX)); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY)); @@ -128,6 +125,15 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo }, input, output); } + else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + vst1q_s8(reinterpret_cast(output_ptr + output.offset()), + vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast(input.ptr())), input_qinfo), output_qinfo)); + }, + input, output); + } else { execute_window_loop(window, [&](const Coordinates &) @@ -140,3 +146,4 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo input, output); } } +} // namespace arm_compute diff --git a/src/core/NEON/kernels/NEPadLayerKernel.cpp b/src/core/NEON/kernels/NEPadLayerKernel.cpp index 88a1c2ec83..07229af358 100644 --- a/src/core/NEON/kernels/NEPadLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPadLayerKernel.cpp @@ -38,6 +38,8 @@ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &paddings, const PaddingMode mode) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MSG(mode != PaddingMode::CONSTANT, "Only constant padding mode is supported"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(paddings.size() > 4, "Padding list bigger than 4 dimensions"); if(output->total_size() != 0) diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp index 897b764b45..2c0db769f5 100644 --- a/src/core/NEON/kernels/NEPermuteKernel.cpp +++ b/src/core/NEON/kernels/NEPermuteKernel.cpp @@ -91,6 +91,7 @@ inline bool is_permutation_supported(const PermutationVector &v) Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm) { + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_permutation_supported(perm), "PermutationVector not supported."); const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm); diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index ece5aa431c..f8a8732c64 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -41,11 +41,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t stride) { //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); diff --git a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp index 649fba30d4..53fcfd724d 100644 --- a/src/core/NEON/kernels/NEReshapeLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReshapeLayerKernel.cpp @@ -44,8 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size() != output->tensor_shape().total_size()); @@ -107,6 +106,7 @@ void NEReshapeLayerKernel::run(const Window &window, const ThreadInfo &info) case DataType::U8: case DataType::S8: case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: reshape_tensor(window, _input, _output); break; case DataType::U16: diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 99328deecd..2f584164dc 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -49,10 +49,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output, axis); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(axis, 1, DataType::U32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->num_dimensions() > 1, "Axis must be a 1D tensor"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis->dimension(0) > 4, "Only up to 4 dimensions can be reversed"); @@ -200,6 +197,7 @@ void NEReverseKernel::run(const Window &window, const ThreadInfo &info) run_reverse(window, _input, _axis, _output); break; case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: case DataType::U8: case DataType::S8: run_reverse(window, _input, _axis, _output); diff --git a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp index f51c3940b7..ba3377f13b 100644 --- a/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp @@ -41,6 +41,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *padddings, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, padddings, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON(block_info->num_dimensions() > 1); @@ -62,6 +63,7 @@ Status validate_arguments_static(const ITensorInfo *input, const int block_shape const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x < 1 || block_shape_y < 1); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp index 4803365013..b2ce63e549 100644 --- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp @@ -41,6 +41,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON(block_shape < 1); diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index 3447d59bcc..5deca9e595 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -45,9 +45,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int axis, unsigned { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S8, - DataType::U16, DataType::S16, DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(idx_input >= num_tensors); ARM_COMPUTE_RETURN_ERROR_ON(axis > input->num_dimensions()); ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4); diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp index 2de49c6864..15f786a521 100644 --- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp +++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp @@ -43,11 +43,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, DataType::QASYMM16, DataType::QSYMM16, - DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4); ARM_COMPUTE_RETURN_ERROR_ON(starts.num_dimensions() > input->num_dimensions()); diff --git a/src/core/NEON/kernels/NETileKernel.cpp b/src/core/NEON/kernels/NETileKernel.cpp index dbeacfad94..98f66e8391 100644 --- a/src/core/NEON/kernels/NETileKernel.cpp +++ b/src/core/NEON/kernels/NETileKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,6 +38,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON(multiples.size() > 4); ARM_COMPUTE_RETURN_ERROR_ON(multiples.empty()); ARM_COMPUTE_RETURN_ERROR_ON(std::any_of(multiples.begin(), multiples.end(), [](uint32_t e) diff --git a/src/core/NEON/kernels/NETransposeKernel.cpp b/src/core/NEON/kernels/NETransposeKernel.cpp index a0a8b82494..6a8e6ffeb5 100644 --- a/src/core/NEON/kernels/NETransposeKernel.cpp +++ b/src/core/NEON/kernels/NETransposeKernel.cpp @@ -75,10 +75,9 @@ unsigned int num_elems_processed(size_t element_size) Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) { + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, DataType::U32, DataType::S32, - DataType::F16, - DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); if(output->total_size() != 0) { diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index cafa20a1bd..e164a38708 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -36,8 +36,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { std::pair validate_and_configure_window(ITensorInfo *input, unsigned int width_offset, ITensorInfo *output) @@ -58,10 +58,7 @@ Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, c { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, - DataType::U8, DataType::S8, DataType::QASYMM8, - DataType::U16, DataType::S16, DataType::F16, - DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0)); @@ -128,6 +125,15 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & }, input, output); } + else if(dt == DataType::QASYMM8_SIGNED && input_qinfo != output_qinfo) + { + execute_window_loop(window, [&](const Coordinates &) + { + vst1q_s8(reinterpret_cast(output_ptr + output.offset()), + vquantize_signed(vdequantize(vld1q_s8(reinterpret_cast(input.ptr())), input_qinfo), output_qinfo)); + }, + input, output); + } else { execute_window_loop(window, [&](const Coordinates &) @@ -140,3 +146,4 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & input, output); } } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NECol2Im.cpp b/src/runtime/NEON/functions/NECol2Im.cpp index 78c6bc0475..a1113bf5bd 100644 --- a/src/runtime/NEON/functions/NECol2Im.cpp +++ b/src/runtime/NEON/functions/NECol2Im.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ void NECol2Im::configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims) { auto k = arm_compute::support::cpp14::make_unique(); @@ -39,3 +39,4 @@ Status NECol2Im::validate(const ITensorInfo *input, const ITensorInfo *output, c { return NECol2ImKernel::validate(input, output, convolved_dims); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NECopy.cpp b/src/runtime/NEON/functions/NECopy.cpp index efa8b893aa..5f46023ecc 100644 --- a/src/runtime/NEON/functions/NECopy.cpp +++ b/src/runtime/NEON/functions/NECopy.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void NECopy::configure(ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); @@ -41,3 +41,4 @@ Status NECopy::validate(const arm_compute::ITensorInfo *input, const arm_compute { return NECopyKernel::validate(input, output); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFillBorder.cpp b/src/runtime/NEON/functions/NEFillBorder.cpp index 44e49520dc..6b7a0faa85 100644 --- a/src/runtime/NEON/functions/NEFillBorder.cpp +++ b/src/runtime/NEON/functions/NEFillBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/core/Window.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -using namespace arm_compute; - +namespace arm_compute +{ void NEFillBorder::configure(ITensor *input, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value) { _border_handler.configure(input, BorderSize(border_width), border_mode, constant_border_value); @@ -37,3 +37,4 @@ void NEFillBorder::run() { NEScheduler::get().schedule(&_border_handler, Window::DimZ); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp index 57bef2b933..fb175a1dca 100644 --- a/src/runtime/NEON/functions/NEFlattenLayer.cpp +++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,8 +27,8 @@ #include "arm_compute/core/Size2D.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ void NEFlattenLayer::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); @@ -39,4 +39,5 @@ void NEFlattenLayer::configure(const ITensor *input, ITensor *output) Status NEFlattenLayer::validate(const ITensorInfo *input, const ITensorInfo *output) { return NEFlattenLayerKernel::validate(input, output); -} \ No newline at end of file +} +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp index 802b94650e..cb93712da0 100644 --- a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp +++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,8 +30,8 @@ #include "arm_compute/core/Validate.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ void NEGEMMTranspose1xW::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); @@ -42,3 +42,4 @@ Status NEGEMMTranspose1xW::validate(const ITensorInfo *input, const ITensorInfo { return NEGEMMTranspose1xWKernel::validate(input, output); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGather.cpp b/src/runtime/NEON/functions/NEGather.cpp index 078bd5ab26..428ef72d11 100644 --- a/src/runtime/NEON/functions/NEGather.cpp +++ b/src/runtime/NEON/functions/NEGather.cpp @@ -41,5 +41,4 @@ Status NEGather::validate(const ITensorInfo *input, const ITensorInfo *indices, { return NEGatherKernel::validate(input, indices, output, axis); } - } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp index 9102fca7f6..2a6972918e 100644 --- a/src/runtime/NEON/functions/NEIm2Col.cpp +++ b/src/runtime/NEON/functions/NEIm2Col.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,8 +27,8 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ NEIm2Col::NEIm2Col() : _kernel(), _y_dim(1) { @@ -51,3 +51,4 @@ void NEIm2Col::run() { NEScheduler::get().schedule(&_kernel, _y_dim); } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEPermute.cpp b/src/runtime/NEON/functions/NEPermute.cpp index 92abd03e2a..e6c41bfb03 100644 --- a/src/runtime/NEON/functions/NEPermute.cpp +++ b/src/runtime/NEON/functions/NEPermute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,8 @@ #include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ void NEPermute::configure(const ITensor *input, ITensor *output, const PermutationVector &perm) { auto k = arm_compute::support::cpp14::make_unique(); @@ -39,3 +39,4 @@ Status NEPermute::validate(const ITensorInfo *input, const ITensorInfo *output, { return NEPermuteKernel::validate(input, output, perm); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEReshapeLayer.cpp b/src/runtime/NEON/functions/NEReshapeLayer.cpp index 4600f36660..e2cd45ae6e 100644 --- a/src/runtime/NEON/functions/NEReshapeLayer.cpp +++ b/src/runtime/NEON/functions/NEReshapeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -29,8 +29,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void NEReshapeLayer::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); @@ -45,3 +45,4 @@ Status NEReshapeLayer::validate(const ITensorInfo *input, const ITensorInfo *out return Status{}; } +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/NEON/functions/NESelect.cpp b/src/runtime/NEON/functions/NESelect.cpp index 509bbaa24e..c6089c8ec0 100644 --- a/src/runtime/NEON/functions/NESelect.cpp +++ b/src/runtime/NEON/functions/NESelect.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,6 @@ #include "arm_compute/core/NEON/kernels/NESelectKernel.h" #include "arm_compute/core/Types.h" -using namespace arm_compute; - namespace arm_compute { void NESelect::configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output) diff --git a/src/runtime/NEON/functions/NETranspose.cpp b/src/runtime/NEON/functions/NETranspose.cpp index b5b28e8e18..fd0d73aa7c 100644 --- a/src/runtime/NEON/functions/NETranspose.cpp +++ b/src/runtime/NEON/functions/NETranspose.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void NETranspose::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); @@ -40,4 +40,5 @@ void NETranspose::configure(const ITensor *input, ITensor *output) Status NETranspose::validate(const ITensorInfo *input, const ITensorInfo *output) { return NETransposeKernel::validate(input, output); -} \ No newline at end of file +} +} // namespace arm_compute \ No newline at end of file diff --git a/tests/validation/NEON/BatchConcatenateLayer.cpp b/tests/validation/NEON/BatchConcatenateLayer.cpp index f95663dbd3..2c0ebc3d54 100644 --- a/tests/validation/NEON/BatchConcatenateLayer.cpp +++ b/tests/validation/NEON/BatchConcatenateLayer.cpp @@ -136,10 +136,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framew // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEBatchConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), - framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("Axis", 3))) +TEST_SUITE_END() +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEBatchConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + framework::dataset::make("Axis", 3))) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp index 844c3987db..eea7c4d82f 100644 --- a/tests/validation/NEON/DepthConcatenateLayer.cpp +++ b/tests/validation/NEON/DepthConcatenateLayer.cpp @@ -133,10 +133,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framew // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), - framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("Axis", 2))) +TEST_SUITE_END() +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small3DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + framework::dataset::make("Axis", 2))) { // Validate output validate(Accessor(_target), _reference); diff --git a/tests/validation/NEON/HeightConcatenateLayer.cpp b/tests/validation/NEON/HeightConcatenateLayer.cpp index 075dfa3b24..bfb0a21ab9 100644 --- a/tests/validation/NEON/HeightConcatenateLayer.cpp +++ b/tests/validation/NEON/HeightConcatenateLayer.cpp @@ -118,15 +118,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture, frame // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEHeightConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), - framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("Axis", 1))) +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEHeightConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + framework::dataset::make("Axis", 1))) { // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() diff --git a/tests/validation/NEON/WidthConcatenateLayer.cpp b/tests/validation/NEON/WidthConcatenateLayer.cpp index ba0ff1bd81..3edf136cab 100644 --- a/tests/validation/NEON/WidthConcatenateLayer.cpp +++ b/tests/validation/NEON/WidthConcatenateLayer.cpp @@ -117,9 +117,13 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture, framew // Validate output validate(Accessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEWidthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::ConcatenateLayerShapes(), framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("Axis", 0))) +TEST_SUITE_END() + +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEWidthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(concat(datasets::Small2DShapes(), datasets::Tiny4DShapes()), + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + framework::dataset::make("Axis", 0))) { // Validate output diff --git a/tests/validation/reference/ConcatenateLayer.cpp b/tests/validation/reference/ConcatenateLayer.cpp index aa74ca2474..266dae1c27 100644 --- a/tests/validation/reference/ConcatenateLayer.cpp +++ b/tests/validation/reference/ConcatenateLayer.cpp @@ -70,16 +70,27 @@ SimpleTensor widthconcatenate_layer(const std::vector> &srcs, for(int r = 0; r < height; ++r) { const int offset = u * height * depth + d * height + r; - if(src.data_type() == DataType::QASYMM8 && src.quantization_info() != dst.quantization_info()) + if(is_data_type_quantized(src.data_type()) && src.quantization_info() != dst.quantization_info()) { const UniformQuantizationInfo iq_info = src.quantization_info().uniform(); const UniformQuantizationInfo oq_info = dst.quantization_info().uniform(); - std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t) + if(src.data_type() == DataType::QASYMM8) { - const float dequantized_input = dequantize_qasymm8(t, iq_info); - return quantize_qasymm8(dequantized_input, oq_info); - }); + std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t) + { + const float dequantized_input = dequantize_qasymm8(t, iq_info); + return quantize_qasymm8(dequantized_input, oq_info); + }); + } + else + { + std::transform(src_ptr, src_ptr + width, dst_ptr + width_offset + offset * width_out, [&](T t) + { + const float dequantized_input = dequantize_qasymm8_signed(t, iq_info); + return quantize_qasymm8_signed(dequantized_input, oq_info); + }); + } src_ptr += width; } else @@ -98,6 +109,7 @@ SimpleTensor widthconcatenate_layer(const std::vector> &srcs, template SimpleTensor widthconcatenate_layer(const std::vector> &srcs, SimpleTensor &dst); template SimpleTensor widthconcatenate_layer(const std::vector> &srcs, SimpleTensor &dst); template SimpleTensor widthconcatenate_layer(const std::vector> &srcs, SimpleTensor &dst); +template SimpleTensor widthconcatenate_layer(const std::vector> &srcs, SimpleTensor &dst); } // namespace template @@ -148,6 +160,7 @@ SimpleTensor concatenate_layer(std::vector> &srcs, SimpleTens template SimpleTensor concatenate_layer(std::vector> &srcs, SimpleTensor &dst, unsigned int axis); template SimpleTensor concatenate_layer(std::vector> &srcs, SimpleTensor &dst, unsigned int axis); template SimpleTensor concatenate_layer(std::vector> &srcs, SimpleTensor &dst, unsigned int axis); +template SimpleTensor concatenate_layer(std::vector> &srcs, SimpleTensor &dst, unsigned int axis); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1