aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-07-06 11:27:21 +0100
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-07-20 12:39:29 +0000
commitf6f7876e9ee8b58a8a6b335b032d554412fa3983 (patch)
tree669c86bfc60ec99965151022a4112c53116b06c0
parent954051f449788f22eb605b126f71af923950ca29 (diff)
downloadComputeLibrary-f6f7876e9ee8b58a8a6b335b032d554412fa3983.tar.gz
COMPMID-3532: Align data type support between doxygen and implementation - CL
Also removes some unused code. Change-Id: I85687c40999c3cdf9e6fccfcd020b0901a9515fe Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3581 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--arm_compute/core/CL/CLHelpers.h8
-rw-r--r--arm_compute/core/CL/CLKernels.h1
-rw-r--r--arm_compute/core/CL/kernels/CLCannyEdgeKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLChannelCombineKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLConvolutionKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLCopyKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h6
-rw-r--r--arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h12
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h16
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h78
-rw-r--r--arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h12
-rw-r--r--arm_compute/core/CL/kernels/CLPadLayerKernel.h5
-rw-r--r--arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h70
-rw-r--r--arm_compute/core/CL/kernels/CLReorgLayerKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h93
-rw-r--r--arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLComparison.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLCopy.h6
-rw-r--r--arm_compute/runtime/CL/functions/CLElementwiseOperations.h178
-rw-r--r--arm_compute/runtime/CL/functions/CLFill.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLFillBorder.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMM.h4
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h46
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h8
-rw-r--r--arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h71
-rw-r--r--arm_compute/runtime/CL/functions/CLReductionOperation.h11
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h38
-rw-r--r--arm_compute/runtime/CL/functions/CLUnstack.h12
-rw-r--r--arm_compute/runtime/CL/functions/CLUpsampleLayer.h8
-rw-r--r--docs/00_introduction.dox4
-rw-r--r--src/core/CL/CLHelpers.cpp5
-rw-r--r--src/core/CL/CLKernelLibrary.cpp1
-rw-r--r--src/core/CL/cl_kernels/activation_layer_quant.cl6
-rw-r--r--src/core/CL/cl_kernels/channel_shuffle.cl48
-rw-r--r--src/core/CL/cl_kernels/comparisons.cl6
-rw-r--r--src/core/CL/cl_kernels/concatenate.cl6
-rw-r--r--src/core/CL/cl_kernels/copy_tensor.cl6
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution.cl4
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution_quantized.cl24
-rw-r--r--src/core/CL/cl_kernels/fill_border.cl6
-rw-r--r--src/core/CL/cl_kernels/gemm.cl44
-rw-r--r--src/core/CL/cl_kernels/gemmlowp.cl6
-rw-r--r--src/core/CL/cl_kernels/memset.cl4
-rw-r--r--src/core/CL/cl_kernels/nonmax.cl6
-rw-r--r--src/core/CL/cl_kernels/pad_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/reduction_operation.cl16
-rw-r--r--src/core/CL/cl_kernels/reorg_layer.cl6
-rw-r--r--src/core/CL/cl_kernels/reverse.cl46
-rw-r--r--src/core/CL/cl_kernels/select.cl20
-rw-r--r--src/core/CL/cl_kernels/softmax_layer_quantized.cl4
-rw-r--r--src/core/CL/cl_kernels/stack_layer.cl4
-rw-r--r--src/core/CL/cl_kernels/transpose.cl4
-rw-r--r--src/core/CL/cl_kernels/upsample_layer.cl10
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp12
-rw-r--r--src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp12
-rw-r--r--src/core/CL/kernels/CLFillBorderKernel.cpp12
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp142
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp15
-rw-r--r--src/core/CL/kernels/CLPadLayerKernel.cpp1
-rw-r--r--src/core/CL/kernels/CLStackLayerKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLUpsampleLayerKernel.cpp19
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp5
-rw-r--r--src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp7
-rw-r--r--src/runtime/CL/functions/CLLSTMLayerQuantized.cpp3
-rw-r--r--src/runtime/CL/functions/CLRNNLayer.cpp12
-rw-r--r--src/runtime/CL/functions/CLReductionOperation.cpp62
70 files changed, 515 insertions, 827 deletions
diff --git a/Android.bp b/Android.bp
index 25b221964d..45d96e7e9b 100644
--- a/Android.bp
+++ b/Android.bp
@@ -135,7 +135,6 @@ cc_library_static {
"src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp",
"src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp",
"src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp",
- "src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp",
"src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp",
"src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp",
"src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp",
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index f3d64155a6..cf18e16e34 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -97,14 +97,6 @@ std::string get_cl_dot8_acc_type_from_data_type(const DataType &dt);
*/
std::string get_data_size_from_data_type(const DataType &dt);
-/** Translates fixed point tensor data type to the underlying OpenCL type.
- *
- * @param[in] dt @ref DataType to be translated to OpenCL type.
- *
- * @return The string specifying the underlying OpenCL type to be used.
- */
-std::string get_underlying_cl_type_from_data_type(const DataType &dt);
-
/** Helper function to get the GPU target from CL device
*
* @param[in] device A CL device
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index ce3b325546..253df597e0 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -84,7 +84,6 @@
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
index 5aec25259f..c4d0297aef 100644
--- a/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
+++ b/arm_compute/core/CL/kernels/CLCannyEdgeKernel.h
@@ -26,8 +26,6 @@
#include "arm_compute/core/CL/ICLKernel.h"
-#include <cstdint>
-
namespace arm_compute
{
class ICLTensor;
@@ -136,7 +134,7 @@ public:
CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
/** Initialise the kernel's source, destination and border mode.
*
- * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] input Source tensor. Data types supported: U16/U32.
* @param[out] output Destination tensor. Data types supported: U8.
* @param[in] upper_thr Upper threshold used for the hysteresis
* @param[in] lower_thr Lower threshold used for the hysteresis
@@ -154,7 +152,7 @@ public:
/** Initialise the kernel's source, destination and border mode.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] input Source tensor. Data types supported: U16/U32.
* @param[out] output Destination tensor. Data types supported: U8.
* @param[in] upper_thr Upper threshold used for the hysteresis
* @param[in] lower_thr Lower threshold used for the hysteresis
diff --git a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
index 3ba426cbfa..f9c33df7c1 100644
--- a/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
+++ b/arm_compute/core/CL/kernels/CLChannelCombineKernel.h
@@ -57,7 +57,7 @@ public:
* @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
* @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
* @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
+ * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
*/
void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
/** Configure function's inputs and outputs.
@@ -75,7 +75,7 @@ public:
* @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
* @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
* @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor.
+ * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
*/
void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
/** Configure function's inputs and outputs.
@@ -84,7 +84,7 @@ public:
* @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
* @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
* @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor.
+ * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
*/
void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
diff --git a/arm_compute/core/CL/kernels/CLConvolutionKernel.h b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
index 4bf7c754b0..0f500fb63a 100644
--- a/arm_compute/core/CL/kernels/CLConvolutionKernel.h
+++ b/arm_compute/core/CL/kernels/CLConvolutionKernel.h
@@ -108,7 +108,7 @@ public:
*
* @param[in] compile_context The compile context to be used.
* @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: S16.
+ * @param[out] output Destination tensor, Data types supported: U16/S16/S32.
* @param[in] conv Convolution matrix to apply to the input tensor.
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
@@ -135,7 +135,7 @@ class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
public:
/** Initialise the kernel's input, output and border mode.
*
- * @param[in] input Source tensor. Data types supported: S16.
+ * @param[in] input Source tensor. Data types supported: U16/S16/S32.
* @param[out] output Destination tensor, Data types supported: U8, S16.
* @param[in] conv Convolution matrix to apply to the input tensor.
* @param[in] scale Scale of the convolution matrix.
@@ -146,7 +146,7 @@ public:
/** Initialise the kernel's input, output and border mode.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: S16.
+ * @param[in] input Source tensor. Data types supported: U16/S16/S32.
* @param[out] output Destination tensor, Data types supported: U8, S16.
* @param[in] conv Convolution matrix to apply to the input tensor.
* @param[in] scale Scale of the convolution matrix.
diff --git a/arm_compute/core/CL/kernels/CLCopyKernel.h b/arm_compute/core/CL/kernels/CLCopyKernel.h
index 1f0b5a40e4..11a6d54e90 100644
--- a/arm_compute/core/CL/kernels/CLCopyKernel.h
+++ b/arm_compute/core/CL/kernels/CLCopyKernel.h
@@ -47,7 +47,7 @@ public:
CLCopyKernel &operator=(CLCopyKernel &&) = default;
/** Initialize the kernel's input, output.
*
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] padding (Optional) Padding to be applied to the input tensor
* @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
@@ -56,7 +56,7 @@ public:
/** Initialize the kernel's input, output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] padding (Optional) Padding to be applied to the input tensor
* @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
@@ -64,7 +64,7 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
*
- * @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor info. Data types supported: All.
* @param[in] output Destination tensor info. Data types supported: same as @p input.
* @param[in] padding (Optional) Padding to be applied to the input tensor
* @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index d933a7eea5..4ca6c0bf4a 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -40,7 +40,7 @@ public:
/** Default move assignment operator. */
/** Initialize the function's source, destination, conv and border_size.
*
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
@@ -61,7 +61,7 @@ public:
/** Initialize the function's source, destination, conv and border_size.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
@@ -81,7 +81,7 @@ public:
const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
*
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
* Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
* @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
diff --git a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
index 699ff5d7a9..1995aed7b6 100644
--- a/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h
@@ -113,7 +113,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
*
* @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[in] output Output tensor. Data types supported: Same as @p input1.
* @param[in] policy Policy to use to handle overflow.
@@ -124,7 +124,7 @@ public:
*
* @param[in] compile_context The compile context to be used.
* @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[in] output Output tensor. Data types supported: Same as @p input1.
* @param[in] policy Policy to use to handle overflow.
@@ -136,7 +136,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
*
* @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[in] output Output tensor info. Data types supported: Same as @p input1.
* @param[in] policy Policy to use to handle overflow.
@@ -170,7 +170,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
*
* @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[in] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
@@ -180,7 +180,7 @@ public:
*
* @param[in] compile_context The compile context to be used.
* @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
* @param[in] output Output tensor. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
@@ -191,7 +191,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
*
* @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[in] output Output tensor info. Data types supported: Same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
index bb7461ca44..6066e2a815 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h
@@ -48,7 +48,7 @@ public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Input tensor. Data type supported: S8
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
* @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -60,7 +60,7 @@ public:
/** Initialise the kernel's input and output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S8
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
* @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -85,7 +85,7 @@ class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel
public:
/** Initialise the kernel's input and output.
*
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
* @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -97,7 +97,7 @@ public:
/** Initialise the kernel's input and output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
* @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -108,7 +108,7 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
*
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
* @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -134,7 +134,7 @@ class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel
public:
/** Initialise the kernel's input and output.
*
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
* @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -146,7 +146,7 @@ public:
/** Initialise the kernel's input and output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
* @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
@@ -157,7 +157,7 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
*
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
* @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
* @param[in] info Kernel metadata:
* - k Number of matrix columns/rows depending on the type of reduction.
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
deleted file mode 100644
index 3b4a0be0e8..0000000000
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface to add a bias to each row of the input tensor
- *
- */
-class CLGEMMMatrixAccumulateBiasesKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMMatrixAccumulateBiasesKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixAccumulateBiasesKernel(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixAccumulateBiasesKernel &operator=(const CLGEMMMatrixAccumulateBiasesKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixAccumulateBiasesKernel(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- */
- void configure(ICLTensor *accum, const ICLTensor *biases);
- /** Set the accumulate buffer and the biases of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel
- *
- * @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32
- * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
- * @param[in] gpu_target GPU target
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_accum;
- const ICLTensor *_biases;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXACCUMULATEBIASESKERNEL_H */
diff --git a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
index 5f1d82ded1..a741b1745a 100644
--- a/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
+++ b/arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h
@@ -51,9 +51,9 @@ public:
*
* @note At least one of output1 or output2 must be set.
*
- * @param[in] gx The input gradient X tensor. Data types supported: S16.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16.
+ * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
+ * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
* @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
* @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
* @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
@@ -65,9 +65,9 @@ public:
* @note At least one of output1 or output2 must be set.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] gx The input gradient X tensor. Data types supported: S16.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16.
+ * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
+ * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
* @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
* @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
* @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
diff --git a/arm_compute/core/CL/kernels/CLPadLayerKernel.h b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
index 3b78bb9834..5bf5841803 100644
--- a/arm_compute/core/CL/kernels/CLPadLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPadLayerKernel.h
@@ -25,7 +25,6 @@
#define ARM_COMPUTE_CLPADLAYERKERNEL_H
#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
namespace arm_compute
{
@@ -49,7 +48,7 @@ public:
~CLPadLayerKernel() = default;
/** Set the input and output tensor.
*
- * @param[in] input Source tensor. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data type supported: same as @p input
* @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
* specifies the front and the end padding in the i-th dimension.
@@ -73,7 +72,7 @@ public:
PaddingMode mode = PaddingMode::CONSTANT);
/** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32.
+ * @param[in] input Source tensor info. Data types supported: All.
* @param[in] output Output tensor info. Data type supported: same as @p input
* @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
* specifies the front and the end padding in the i-th dimension.
diff --git a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
index bb64060a12..bb98eb83bf 100644
--- a/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
+++ b/arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h
@@ -48,17 +48,23 @@ public:
CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
/** Initialise the kernel's input, output and border mode.
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
* @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported:
- * - U8, only if both input are U8
- * - QASYMM8, only if both inputs are QASYMM8
- * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
- * - S16
- * - QSYMM16, only if both inputs are QSYMM16
- * - S32, only if both inputs are QSYMM16
- * - F16
- * - F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -69,10 +75,24 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's input, output and border mode.
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
* @param[in] compile_context The compile context to be used.
* @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -83,17 +103,23 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
* @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported:
- * - U8, only if both input are U8
- * - QASYMM8, only if both inputs are QASYMM8
- * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
- * - S16
- * - QSYMM16, only if both inputs are QSYMM16
- * - S32, only if both inputs are QSYMM16
- * - F16
- * - F32
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
diff --git a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
index 279d891497..e3edc9f724 100644
--- a/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLReorgLayerKernel.h
@@ -47,7 +47,7 @@ public:
CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default;
/** Initialize the kernel's input, output.
*
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor with tensor shape:
* [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
* the same number of input elements. Data types supported: same as @p input.
@@ -58,7 +58,7 @@ public:
/** Initialize the kernel's input, output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor with tensor shape:
* [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
* the same number of input elements. Data types supported: same as @p input.
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index 5297af2c1c..f8c1019d53 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -27,93 +27,10 @@
#include "arm_compute/core/CL/ICLSimple3DKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
-#include <tuple>
-
namespace arm_compute
{
class ICLTensor;
-/** Interface for the identifying the max value of 1D Logits */
-class CLLogits1DMaxKernel : public ICLSimple3DKernel
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[out] output Destination tensor. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] output Destination tensor. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Interface for shifting, exponentiating and summing the logits */
-class CLLogits1DShiftExpSumKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLogits1DShiftExpSumKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0
- */
- void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[in] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- * @param[in] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_max;
- ICLTensor *_output;
- ICLTensor *_sum;
-};
-
/** Interface for max, shifting, exponentiating and summing the logits */
class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
{
@@ -134,7 +51,7 @@ public:
CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[in,out] max Max values tensor. Data types supported: same as @p input
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
@@ -144,7 +61,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[in,out] max Max values tensor. Data types supported: same as @p input
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
@@ -153,7 +70,7 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
*
- * @param[in] input Source tensor. Data types supported: F16/F32
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[in] max Max values tensor. Data types supported: same as @p input
* @param[in] output Destination tensor. Data types supported: same as @p input
* @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input
@@ -205,7 +122,7 @@ public:
*
* @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
* @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+ * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
* @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
*/
void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
@@ -214,7 +131,7 @@ public:
* @param[in] compile_context The compile context to be used.
* @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
* @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+ * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
* @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
diff --git a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
index d8e1ae113d..b523b97233 100644
--- a/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h
@@ -49,7 +49,7 @@ public:
/** Initialise the kernel's input and output.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] info Contains stride information described in @ref Size2D.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
@@ -58,7 +58,7 @@ public:
/** Initialise the kernel's input and output.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] info Contains stride information described in @ref Size2D.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
@@ -66,7 +66,7 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: All.
* @param[in] output Destination tensor info. Data types supported: same as @p input.
* @param[in] info Contains stride information described in @ref Size2D.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index ac6a41ef2e..c6d61e45f2 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h
@@ -79,7 +79,7 @@ public:
public:
/** Initialise the kernel's inputs and outputs.
*
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -92,7 +92,7 @@ public:
/** Initialise the kernel's inputs and outputs.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
@@ -101,7 +101,7 @@ public:
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLComparison
*
- * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input1 Source tensor. Data types supported: All.
* @param[in] input2 Source tensor. Data types supported: Same as @p input1.
* @param[in] output Destination tensor. Data types supported: U8.
*
diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index ee5f31bc4c..c20d75eea8 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h
@@ -38,7 +38,7 @@ class CLCopy : public ICLSimpleFunction
public:
/** Initialise the function's source and destination.
*
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
*/
@@ -46,14 +46,14 @@ public:
/** Initialise the function's source and destination.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[out] output Output tensor. Data types supported: Same as @p input.
*
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLCopy
*
- * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] input Source tensor. Data types supported: All.
* @param[in] output Output tensor. Data types supported: Same as @p input.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 8cf1ca83c5..9cd3c150cc 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
@@ -33,7 +33,7 @@ class ICLTensor;
/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for addition
*
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @note The function performs an arithmetic addition between two tensors.
*/
class CLArithmeticAddition : public ICLSimpleFunction
@@ -41,32 +41,74 @@ class CLArithmeticAddition : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*
@@ -77,7 +119,7 @@ public:
/** Basic function to run @ref CLSaturatedArithmeticOperationKernel for subtraction
*
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/U32/F16/F32.
+ * @note The tensor data type for the inputs must be U8/QASYMM8/QASYMM8_SIGNED/S16/S32/F16/F32.
* @note The function performs an arithmetic subtraction between two tensors.
*/
class CLArithmeticSubtraction : public ICLSimpleFunction
@@ -85,32 +127,74 @@ class CLArithmeticSubtraction : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's inputs, output and conversion policy.
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16/F16/F32.
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (S16,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,S16) -> S16
+ * - (S32,S32) -> S32
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ *
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
+ * @param[in] output Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/F16/F32.
* @param[in] policy Policy to use to handle overflow.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*
@@ -170,30 +254,30 @@ class CLElementwiseMax : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[out] output Output tensor. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's inputs, output and conversion policy.
*
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[out] output Output tensor. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*
* @return a status
@@ -211,30 +295,30 @@ class CLElementwiseMin : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[out] output Output tensor. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's inputs, output and conversion policy.
*
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[out] output Output tensor. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/S32/U32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*
* @return a status
@@ -252,30 +336,30 @@ class CLElementwiseSquaredDiff : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[out] output Output tensor. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's inputs, output and conversion policy.
*
* @param[in] compile_context The compile context to be used.
- * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+ * @param[in, out] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+ * @param[in, out] input2 Second tensor input. Data types supported: same as @p input1.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[out] output Output tensor. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference
*
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
- * @param[in] input2 Second tensor input info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
- * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 ( only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: same as @p input1.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index 99cae077fe..b79b234158 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h
@@ -38,14 +38,14 @@ class CLFill : public ICLSimpleFunction
public:
/** Initialize the function
*
- * @param[in,out] tensor Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in,out] tensor Source tensor. Data types supported: All.
* @param[in] constant_value Constant value to use to fill tensor.
*/
void configure(ICLTensor *tensor, PixelValue constant_value);
/** Initialize the function
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in,out] tensor Source tensor. Data types supported: All.
* @param[in] constant_value Constant value to use to fill tensor.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value);
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index e552c353db..18bc20e654 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -38,7 +38,7 @@ class CLFillBorder : public ICLSimpleFunction
public:
/** Initialize the function
*
- * @param[in,out] tensor Source tensor. Data types supported: U8/S16
+ * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
* @param[in] border_width The border width
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
@@ -47,7 +47,7 @@ public:
/** Initialize the function
*
* @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Source tensor. Data types supported: U8/S16
+ * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
* @param[in] border_width The border width
* @param[in] border_mode Strategy to use for borders.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 018d88d366..8e4d3906d1 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -74,7 +74,7 @@ public:
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
- * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
@@ -85,7 +85,7 @@ public:
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index be9283b6ec..277b27f690 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -98,8 +98,8 @@ class CLConvolutionLayerReshapeWeightsTransform : public ITransformWeights
public:
/** Configures the @ref CLConvolutionLayerReshapeWeights function
*
- * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] biases Biases tensor. Data type supported: Same as @p input.
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] biases Biases tensor. Data type supported: same as @p input, S32 if @p input is quantized.
* @param[in] num_groups Number of groups when performing a grouped convolution.
*/
void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
@@ -109,8 +109,8 @@ public:
/** Configures the @ref CLConvolutionLayerReshapeWeights function
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] biases Biases tensor. Data type supported: Same as @p input.
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+ * @param[in] biases Biases tensor. Data type supported: same as @p input, S32 if @p input is quantized.
* @param[in] num_groups Number of groups when performing a grouped convolution.
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
@@ -183,11 +183,11 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -204,11 +204,11 @@ public:
* @param[in] compile_context The compile context to be used.
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -225,11 +225,11 @@ public:
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
@@ -252,12 +252,12 @@ private:
/** Configures the appropriate matrix multiply routine
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or
+ * QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
- * @param[in, out] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8 type where output should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
+ * @param[in, out] output Output tensor. Data types supported: same as @p input.
* @param[in] gemmlowp_output_stage GEMMLowp output stage info
* @param[in] gemm_3d_depth Depth of GEMM 3D
* @param[in] act_info Activation to apply after the matrix multiplication
@@ -267,12 +267,12 @@ private:
int gemm_3d_depth, const ActivationLayerInfo &act_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] input Input tensor info. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8 or
+ * QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8_SIGNED.
* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
- * @param[in] output Output tensor info. Data types supported: Same as @p input,
- * except for input of QASYMM8 type where output should be of S32 type.
+ * Data type supported: Should match @p input data type, except for input of quantized type where biases should be of S32 type.
+ * @param[in] output Output tensor info. Data types supported: same as @p input.
* @param[in] gemmlowp_output_stage GEMMLowp output stage info
* @param[in] gemm_3d_depth Depth of GEMM 3D
* @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 0e822bc5a2..57b1e30df5 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -59,8 +59,8 @@ public:
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
* This kernel performs the following computations:
*
- * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
- * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+ * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them.
+ * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
* -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
*
@@ -77,8 +77,8 @@ public:
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
* This kernel performs the following computations:
*
- * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
- * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+ * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them.
+ * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
* -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
*
diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index a647281e29..b87daba1f8 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
@@ -38,19 +38,25 @@ class CLPixelWiseMultiplication : public ICLSimpleFunction
public:
/** Initialise the kernel's inputs, output and convertion policy.
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
* @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
+ * @param[in, out] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output The output tensor. Data types supported:
- * - U8, only if both input are U8
- * - QASYMM8, only if both inputs are QASYMM8
- * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
- * - S16
- * - QSYMM16, only if both inputs are QSYMM16
- * - S32, only if both inputs are QSYMM16
- * - F16
- * - F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -61,12 +67,26 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Initialise the kernel's inputs, output and convertion policy.
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
* @param[in] compile_context The compile context to be used.
* @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[in, out] input2 An input tensor. Data types supported: same as @p input1.
+ * @param[in, out] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
@@ -77,17 +97,24 @@ public:
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
*
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
+ *
* @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info. Data types supported:
- * - U8, only if both input are U8
- * - QASYMM8, only if both inputs are QASYMM8
- * - QASYMM8_SIGNED, only if both inputs are QASYMM8_SIGNED
- * - S16
- * - QSYMM16, only if both inputs are QSYMM16
- * - S32, only if both inputs are QSYMM16
- * - F16
- * - F32
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] scale Scale to apply after multiplication.
* Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
* @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 013ba647b1..5d050d71d6 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h
@@ -26,19 +26,17 @@
#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
-#include <cstdint>
#include <memory>
-#include <vector>
namespace arm_compute
{
+// Forward declarations
class ICLTensor;
/** Perform reduction operation.
@@ -54,7 +52,7 @@ public:
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
* @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
@@ -64,7 +62,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
* @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
@@ -74,7 +72,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation.
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32/S32.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
* @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
@@ -95,7 +93,6 @@ private:
std::vector<CLReductionOperationKernel> _reduction_kernels_vector;
std::vector<CLFillBorderKernel> _border_handlers_vector;
CLReshapeLayer _reshape;
- ReductionOperation _op;
unsigned int _num_of_stages;
unsigned int _reduction_axis;
bool _is_serial;
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index 93ad24e893..ec57bacf07 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -47,8 +47,6 @@ class ICLTensor;
* @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
*
* This function runs the following kernels:
- * -# @ref CLLogits1DMaxKernel
- * -# @ref CLLogits1DShiftExpSumKernel
* -# @ref CLLogits1DNormKernel
* And if the reduce_end_axis is not 0, the function will use one of the the following kernels to reshape the input and
* perform softmax on the reshaped input:
@@ -63,36 +61,36 @@ public:
CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
* @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
- * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
- * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
- * Must be in range [0, input_num_dimensions).
+ * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+ * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+ * Must be in range [0, input_num_dimensions).
*/
void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
/** Set the input and output tensors.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
* @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
- * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
- * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
- * Must be in range [0, input_num_dimensions).
+ * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+ * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+ * Must be in range [0, input_num_dimensions).
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
*
- * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
* @param[in] output Destination tensor. Data types supported: same as @p input
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
* @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
- * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
- * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
- * Must be in range [0, input_num_dimensions).
+ * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+ * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+ * Must be in range [0, input_num_dimensions).
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t reduce_end_axis = 0);
@@ -111,9 +109,9 @@ private:
* @param[in] input Original source tensor.
* @param[in] output Original destination tensor.
* @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
- * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
- * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
- * Must be in range [0, input_num_dimensions).
+ * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+ * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+ * Must be in range [0, input_num_dimensions).
*/
void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis);
/** Utility method to configure the kernels needed to flatten the input
@@ -127,9 +125,9 @@ private:
* @param[in] input Original source tensor.
* @param[in] output Original destination tensor.
* @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
- * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
- * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
- * Must be in range [0, input_num_dimensions).
+ * It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+ * when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+ * Must be in range [0, input_num_dimensions).
*/
void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis);
diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h
index bdef44fb0d..5d4d5710ab 100644
--- a/arm_compute/runtime/CL/functions/CLUnstack.h
+++ b/arm_compute/runtime/CL/functions/CLUnstack.h
@@ -48,8 +48,8 @@ public:
CLUnstack();
/** Set the input, output and unstacking axis.
*
- * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input.
+ * @param[in] input A tensor to be unstacked. Data type supported: All.
+ * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
* Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
* @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
*
@@ -58,8 +58,8 @@ public:
/** Set the input, output and unstacking axis.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in,out] output_vector A vector of tensors. Data types supported: Same as @p input.
+ * @param[in] input A tensor to be unstacked. Data type supported: All.
+ * @param[in,out] output_vector A vector of tensors. Data types supported: same as @p input.
* Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
* @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
*
@@ -67,8 +67,8 @@ public:
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis);
/** Static function to check if given info will lead to a valid configuration of @ref CLUnstack
*
- * @param[in] input Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[in] output_vector Vector of output tensors' info. Data types supported: Same as @p input.
+ * @param[in] input Input tensor info. Data type supported: All.
+ * @param[in] output_vector Vector of output tensors' info. Data types supported: same as @p input.
* @param[in] axis The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
*
* @return a status
diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
index dcaa13471f..07b4c8aecb 100644
--- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
@@ -53,7 +53,7 @@ public:
/** Initialize the function's source, destination, interpolation type and border_mode.
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] info Contains stride information described in @ref Size2D.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
@@ -63,16 +63,16 @@ public:
/** Initialize the function's source, destination, interpolation type and border_mode.
*
* @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor. Data type supported: All.
* @param[out] output Destination tensor. Data types supported: same as @p input.
* @param[in] info Contains stride information described in @ref Size2D.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
*/
void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
const Size2D &info, const InterpolationPolicy upsampling_policy);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+ /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: All.
* @param[in] output Destination tensor info. Data types supported: same as @p input.
* @param[in] info Contains stride information described in @ref Size2D.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index df71c90f3f..8aa70817a8 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -1049,7 +1049,7 @@ v17.03 Sources preview
- New OpenCL kernels / functions:
- @ref CLGradientKernel, @ref CLEdgeNonMaxSuppressionKernel, @ref CLEdgeTraceKernel / @ref CLCannyEdge
- GEMM refactoring + FP16 support: CLGEMMInterleave4x4Kernel, CLGEMMTranspose1xWKernel, @ref CLGEMMMatrixMultiplyKernel, CLGEMMMatrixAdditionKernel / @ref CLGEMM
- - @ref CLGEMMMatrixAccumulateBiasesKernel / @ref CLFullyConnectedLayer
+ - CLGEMMMatrixAccumulateBiasesKernel / @ref CLFullyConnectedLayer
- @ref CLTransposeKernel / @ref CLTranspose
- @ref CLLKTrackerInitKernel, @ref CLLKTrackerStage0Kernel, @ref CLLKTrackerStage1Kernel, @ref CLLKTrackerFinalizeKernel / @ref CLOpticalFlow
- @ref CLNormalizationLayerKernel / @ref CLNormalizationLayer
@@ -1061,7 +1061,7 @@ v17.03 Sources preview
v17.02.1 Sources preview
- New OpenCL kernels / functions:
- - @ref CLLogits1DMaxKernel, @ref CLLogits1DShiftExpSumKernel, @ref CLLogits1DNormKernel / @ref CLSoftmaxLayer
+ - CLLogits1DMaxKernel, CLLogits1DShiftExpSumKernel, @ref CLLogits1DNormKernel / @ref CLSoftmaxLayer
- @ref CLPoolingLayerKernel / @ref CLPoolingLayer
- @ref CLIm2ColKernel, @ref CLCol2ImKernel, CLConvolutionLayerWeightsReshapeKernel / @ref CLConvolutionLayer
- @ref CLRemapKernel / @ref CLRemap
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index d18a319f96..895bb72827 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -218,11 +218,6 @@ std::string get_data_size_from_data_type(const DataType &dt)
}
}
-std::string get_underlying_cl_type_from_data_type(const DataType &dt)
-{
- return get_cl_type_from_data_type(dt);
-}
-
GPUTarget get_target_from_device(const cl::Device &device)
{
// Query device name size
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 72af73bc94..73874b69b2 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -203,7 +203,6 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "gather", "gather.cl" },
{ "gaussian1x5_sub_x", "gaussian_pyramid.cl" },
{ "gaussian5x1_sub_y", "gaussian_pyramid.cl" },
- { "gemm_accumulate_biases", "gemm.cl" },
{ "gemm_ma_f16", "gemm.cl" },
{ "gemm_ma_f32", "gemm.cl" },
{ "gemm_mv", "gemv.cl" },
diff --git a/src/core/CL/cl_kernels/activation_layer_quant.cl b/src/core/CL/cl_kernels/activation_layer_quant.cl
index e304f7bb4c..0481319428 100644
--- a/src/core/CL/cl_kernels/activation_layer_quant.cl
+++ b/src/core/CL/cl_kernels/activation_layer_quant.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -41,7 +41,7 @@
* @note Quantization offsets of the input/output tensors are passed in only if asymmetric with -DO1_VAL= and -DO2_VAL= respectively.
* @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QSYMM16
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -112,7 +112,7 @@ __kernel void activation_layer_quant_f32(
* @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
* @note Quantized value of constant zero should be given as a preprocessor argument using -DCONST_0=value. e.g. -DCONST_0=128.
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QSYMM16
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM16
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/channel_shuffle.cl b/src/core/CL/cl_kernels/channel_shuffle.cl
index 2bf603d5b9..9a87eb4af3 100644
--- a/src/core/CL/cl_kernels/channel_shuffle.cl
+++ b/src/core/CL/cl_kernels/channel_shuffle.cl
@@ -1,26 +1,26 @@
/*
-* Copyright (c) 2018 Arm Limited.
-*
-* SPDX-License-Identifier: MIT
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
+* Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
#include "helpers.h"
#if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(NUM_GROUPS) && defined(K) && defined(SRC_DIM_Z)
@@ -47,7 +47,7 @@
* @note The number of channels in each group must be given as a preprocessor argument using -DK=num. e.g. -DK=1
* K is equal to num_channels / num_groups.
*
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types: All
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
@@ -110,7 +110,7 @@ __kernel void channel_shuffle_nchw(TENSOR4D_DECLARATION(src),
* @note The number of channels in each group must be given as a preprocessor argument using -DK=num. e.g. -DK=1
* K is equal to num_channels / num_groups.
*
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types: All
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/comparisons.cl b/src/core/CL/cl_kernels/comparisons.cl
index 7d5d493f59..408846144d 100644
--- a/src/core/CL/cl_kernels/comparisons.cl
+++ b/src/core/CL/cl_kernels/comparisons.cl
@@ -51,7 +51,7 @@
* @param[in] in1_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] in1_step_z in1_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] in2_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32
+ * @param[in] in2_ptr Pointer to the source tensor. Supported data types: same as @p in1_ptr
* @param[in] in2_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in2_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -59,7 +59,7 @@
* @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8 (only if both inputs are U8), S16/F16/F32
+ * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8
* @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -115,7 +115,7 @@ __kernel void DEFINE_KERNEL(OP_NAME)(
* @param[in] in2_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] in2_step_z in2_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p in1_ptr
+ * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8
* @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index 8adcc1b4da..4281e675d7 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl
@@ -73,7 +73,7 @@ inline VEC_QUANT requantize(VEC_QUANT input, float in_offset, float out_offset,
* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16
* @note First input tensor width should be given as a preprocessor argument using -DINPUT1_WIDTH=width. e.g. -DINPUT1_WIDTH=8
*
- * @param[in] src1_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32
+ * @param[in] src1_ptr Pointer to the source tensor. Supported data types: All.
* @param[in] src1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -155,7 +155,7 @@ __kernel void concatenate_width_x2(
* @note Second input tensor width should be given as a preprocessor argument using -DINPUT2_WIDTH=width. e.g. -DINPUT2_WIDTH=8
* @note Third input tensor width should be given as a preprocessor argument using -DINPUT3_WIDTH=width. e.g. -DINPUT3_WIDTH=8
*
- * @param[in] src1_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32
+ * @param[in] src1_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src1_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src1_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -388,7 +388,7 @@ __kernel void concatenate_height(
* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float
* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/copy_tensor.cl b/src/core/CL/cl_kernels/copy_tensor.cl
index 319798356a..0592e07511 100644
--- a/src/core/CL/cl_kernels/copy_tensor.cl
+++ b/src/core/CL/cl_kernels/copy_tensor.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,7 +32,7 @@
* -# -DDEPTH = The third dimension (depth) of the tensor (it is needed only if d == 3)
* -# -DDATA_TYPE = Input and output datatypes.
*
- * @param[in] in_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] in_ptr Pointer to the source tensor. Supported data types: All
* @param[in] in_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -80,7 +80,7 @@ __kernel void copy_pad_tensor(
#if defined(DATA_TYPE)
/** Performs a copy of input tensor to the output tensor.
*
- * @param[in] in_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] in_ptr Pointer to the source tensor. Supported data types: All
* @param[in] in_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] in_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index 59ae682ad8..e1f6505df7 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -692,7 +692,7 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32(
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=vec_size, e.g., -DVEC_SIZE=4
* @attention Input's height and width should be 3
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
index 6225f17aed..d4bea4b2e8 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl
@@ -144,7 +144,7 @@
/** This function computes the depthwise convolution quantized.
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -152,7 +152,7 @@
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8
+ * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -160,7 +160,7 @@
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
* @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL
* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
@@ -461,9 +461,7 @@ __kernel void dwc_3x3_native_quantized8_nchw(
#endif /*DILATION_X==1*/
/** This function computes the depthwise convolution quantized using dot product when the data layout is NCHW.
*
- * @note Per-channel quantization is not supported by this kernel.
- *
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -471,7 +469,7 @@ __kernel void dwc_3x3_native_quantized8_nchw(
* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8
+ * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -479,7 +477,7 @@ __kernel void dwc_3x3_native_quantized8_nchw(
* @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
* @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL
* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
@@ -789,7 +787,7 @@ __kernel void dwc_3x3_native_quantized8_dot8_nchw(
* @note The convolution stride along the width must be passed at compile time using -DCONV_STRIDE_X (e.g. -DCONV_STRIDE_Y=X)
* @note The convolution stride along the height must be passed at compile time using -DCONV_STRIDE_Y (e.g. -DCONV_STRIDE_Y=1)
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -809,7 +807,7 @@ __kernel void dwc_3x3_native_quantized8_dot8_nchw(
* @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
* @param[in] dst_step_w dst_stride_w * number of elements along W processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] weights_ptr Pointer to the weights tensor reshaped. Supported data types: QASYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] weights_ptr Pointer to the weights tensor reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL
* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
@@ -1028,7 +1026,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc(
* @note The convolution pad top must be passed at compile time using -DCONV_PAD_TOP (e.g. -DCONV_PAD_TOP=1)
* @note The convolution pad top must be passed at compile time using -DCONV_PAD_LEFT (e.g. -DCONV_PAD_LEFT=1).
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -1048,7 +1046,7 @@ __kernel void dwc_3x3_reshaped_quantized8_nhwc(
* @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
* @param[in] dst_step_w dst_stride_w * number of elements along W processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL
* @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
* @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
@@ -1378,7 +1376,7 @@ __kernel void dwc_3x3_reshaped_quantized8_stride1_nhwc(
* @note If REAL_MULTIPLIER is passed at compile time (i.e. -DREAL_MULTIPLIER=1.355f), the final quantization is performed using a floating point multiplication.
* If not, the quantization will be performed using a fixed point multiplication
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl
index 5b3266cc49..5775d899e8 100644
--- a/src/core/CL/cl_kernels/fill_border.cl
+++ b/src/core/CL/cl_kernels/fill_border.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -31,7 +31,7 @@
* @attention The border size for top, bottom, left, right needs to be passed at the compile time.
* e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2
*
- * @param[in,out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32
+ * @param[in,out] buf_ptr Pointer to the source image. Supported data types: All
* @param[in] buf_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes)
@@ -106,7 +106,7 @@ __kernel void fill_image_borders_replicate(
* @attention The border size for top, bottom, left, right needs to be passed at the compile time.
* e.g. --DBORDER_SIZE_TOP=0 -DBORDER_SIZE_BOTTOM=2 -DBORDER_SIZE_LEFT=0 -DBORDER_SIZE_RIGHT=2
*
- * @param[out] buf_ptr Pointer to the source image. Supported data types: U8/U16/S16/U32/S32/F16/F32
+ * @param[out] buf_ptr Pointer to the source image. Supported data types: All
* @param[in] buf_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index e3ce6bf0cd..2360561f8a 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -61,7 +61,7 @@
* (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
* @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile time.
*
- * @param[in] src_ptr Pointer to the source LHS tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source LHS tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source LHS tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source LHS tensor in Y dimension (in bytes)
@@ -261,7 +261,7 @@ __kernel void gemm_reshape_lhs_matrix_nt(TENSOR3D_DECLARATION(src),
* (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
* @note If the M0xK0 blocks have to be interleaved, the option -DINTERLEAVE must passed at compile time.
*
- * @param[in] src_ptr Pointer to the source LHS tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source LHS tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source LHS tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source LHS tensor in Y dimension (in bytes)
@@ -412,7 +412,7 @@ __kernel void gemm_reshape_lhs_matrix_t(TENSOR3D_DECLARATION(src),
* K0: 1,2,3,4,8,16
* H0: greater than 0
*
- * @param[in] src_ptr Pointer to the source RHS tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source RHS tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source RHS tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source RHS tensor in Y dimension (in bytes)
@@ -566,7 +566,7 @@ __kernel void gemm_reshape_rhs_matrix_nt(TENSOR3D_DECLARATION(src),
* K0: 2,3,4,8,16
* H0: greater than 0
*
- * @param[in] src_ptr Pointer to the source RHS tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source RHS tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source RHS tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source RHS tensor in Y dimension (in bytes)
@@ -7626,39 +7626,3 @@ __kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0),
vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0)));
}
#endif // defined(WIDTH_VECTOR_A)
-
-/** This kernel accumulates each row with the biases vector.
- *
- * @note The data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=short.
- * @note The vector size must be passed at compile time using -DVECTOR_SIZE e.g. -DVECTOR_SIZE=16.
- *
- * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: U8/S8/U16/S16/F16/U32/S32/F32
- * @param[in] accum_stride_x Stride of the accmulate tensor in X dimension (in bytes)
- * @param[in] accum_step_x accum_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] accum_stride_y Stride of the accumlulate tensor in Y dimension (in bytes)
- * @param[in] accum_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] accum_offset_first_element_in_bytes The offset of the first element in the accumulate tensor
- * @param[in] biases_ptr Pointer to the biases vector. Same as @p accum_ptr
- * @param[in] biases_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] biases_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-#if defined(DATA_TYPE) && defined(VECTOR_SIZE)
-__kernel void gemm_accumulate_biases(
- IMAGE_DECLARATION(accum),
- VECTOR_DECLARATION(biases))
-{
- Image accum = CONVERT_TO_IMAGE_STRUCT(accum);
- Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
-
- // Vector size, e.g. number of vector elements.
- VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
- accum_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)accum.ptr);
- VEC_DATA_TYPE(DATA_TYPE, VECTOR_SIZE)
- biases_value = VLOAD(VECTOR_SIZE)(0, (__global DATA_TYPE *)biases.ptr);
- accum_value = biases_value + accum_value;
- // Store result in the accumulate buffer
- VSTORE(VECTOR_SIZE)
- (accum_value, 0, (__global DATA_TYPE *)accum.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(VECTOR_SIZE)
diff --git a/src/core/CL/cl_kernels/gemmlowp.cl b/src/core/CL/cl_kernels/gemmlowp.cl
index 0f4a86c3bc..aac8d5a1e2 100644
--- a/src/core/CL/cl_kernels/gemmlowp.cl
+++ b/src/core/CL/cl_kernels/gemmlowp.cl
@@ -1115,7 +1115,7 @@ __kernel void gemmlowp_mm_native(IMAGE_DECLARATION(lhs),
* @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint)
* @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g. -DSCALAR=3)
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8/QASYMM8_SIGNED/QSYMM8
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -1180,7 +1180,7 @@ __kernel void gemmlowp_matrix_a_reduction(TENSOR3D_DECLARATION(src),
* @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint)
* @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (e.g. -DSCALAR=3)
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8/QASYMM8_SIGNED/QSYMM8
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -1253,7 +1253,7 @@ __kernel void gemmlowp_matrix_a_reduction_dot8(TENSOR3D_DECLARATION(src),
* @note The data type for the accumulation must be passed at compile time using -DACC_DATA_TYPE (i.e. -DACC_DATA_TYPE=uint)
* @note In case of scaling the scalar value must be passed at compile time using -DSCALAR (i.e. -DSCALAR=3)
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8/QASYMM8_SIGNED
+ * @param[in] src_ptr Pointer to the source tensor. Supported data type: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/memset.cl b/src/core/CL/cl_kernels/memset.cl
index e8bd1a5eff..bb46a49f84 100644
--- a/src/core/CL/cl_kernels/memset.cl
+++ b/src/core/CL/cl_kernels/memset.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,7 +32,7 @@
* -# -DVEC_SIZE = Vector size
* -# -DLAST_ACCESSED_X = The element that is on the X border (threads trying to set this, might need to step back a bit)
*
- * @param[in] tensor_ptr Pointer to the source image. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[in] tensor_ptr Pointer to the source image. Data types supported: All.
* @param[in] tensor_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] tensor_step_x tensor_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] tensor_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/nonmax.cl b/src/core/CL/cl_kernels/nonmax.cl
index e618b0a269..ab13131807 100644
--- a/src/core/CL/cl_kernels/nonmax.cl
+++ b/src/core/CL/cl_kernels/nonmax.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -25,13 +25,13 @@
/** This function performs Non maxima suppression over a 3x3 window on a given image.
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: U8/F32
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: F32
+ * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p scr_ptr
* @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/pad_layer.cl b/src/core/CL/cl_kernels/pad_layer.cl
index ff43b091fc..4e4d2ad9e7 100644
--- a/src/core/CL/cl_kernels/pad_layer.cl
+++ b/src/core/CL/cl_kernels/pad_layer.cl
@@ -51,7 +51,7 @@
* -# -DPAD_W_BEFORE: Pad to add before the first batch of the input tensor (e.g. -DPAD_W_BEFORE=3)
* -# -DSRC_BATCH: Input tensor's batch size (e.g. -DSRC_BATCH=4)
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
@@ -165,7 +165,7 @@ __kernel void pad_layer_constant(TENSOR3D_DECLARATION(src),
* @note If the starting point to read backward from is less than the output's last element accessed in the X, the following compile flags must be passed at compile time to avoid negative offsets:
* -# -DAFTER_PAD_REM: Defines how much to rotate the vector if the backward calculation attempted to read from a negative offset (e.g. -DAFTER_PAD_REM=3)
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8, S8, QASYMM8, QASYMM8_SIGNED, U16, S16, U32, S32, F16, F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl
index c5fdfd8751..b2e56928d0 100644
--- a/src/core/CL/cl_kernels/reduction_operation.cl
+++ b/src/core/CL/cl_kernels/reduction_operation.cl
@@ -167,11 +167,11 @@ __kernel void reduction_operation_x(
* @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
* @note In case of MIN and MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8/QASYMM8_SIGNED for operation MEAN
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
+ * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptr
* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
@@ -233,13 +233,13 @@ __kernel void reduction_operation_non_parallel_x(
* @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
+ * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptr
* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
@@ -316,7 +316,7 @@ __kernel void reduction_operation_y(
* @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
* @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -324,7 +324,7 @@ __kernel void reduction_operation_y(
* @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
+ * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
@@ -420,7 +420,7 @@ __kernel void reduction_operation_z(
* @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
* @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
*
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
+ * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
* @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -430,7 +430,7 @@ __kernel void reduction_operation_z(
* @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
* @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
* @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
+ * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
* @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/reorg_layer.cl b/src/core/CL/cl_kernels/reorg_layer.cl
index 6a181dcd94..29344de37a 100644
--- a/src/core/CL/cl_kernels/reorg_layer.cl
+++ b/src/core/CL/cl_kernels/reorg_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -39,7 +39,7 @@
* @note The depth of the input tensor must be passed at compile time using -DSRC_DEPTH: e.g. -DSRC_DEPTH=64
* @note The distance between 2 consecutive pixels along the x and y direction must be passed at compile time using -DSTRIDE: e.g. -DSTRIDE=2
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -79,7 +79,7 @@ __kernel void reorg_layer_nchw(
* @note The depth of the input tensor must be passed at compile time using -DSRC_DEPTH: e.g. -DSRC_DEPTH=64
* @note The distance between 2 consecutive pixels along the x and y direction must be passed at compile time using -DSTRIDE: e.g. -DSTRIDE=2
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/reverse.cl b/src/core/CL/cl_kernels/reverse.cl
index 0cc6bc0ee2..10ffe84aeb 100644
--- a/src/core/CL/cl_kernels/reverse.cl
+++ b/src/core/CL/cl_kernels/reverse.cl
@@ -1,26 +1,26 @@
/*
-* Copyright (c) 2018 Arm Limited.
-*
-* SPDX-License-Identifier: MIT
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
+* Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
#include "helpers.h"
#if defined(DATA_TYPE) && defined(NUM_REVERSE_DIMS)
@@ -34,7 +34,7 @@
* @note The data type must be given as a preprocessor argument using -DDATA_TYPE=num. e.g. -DDATA_TYPE=uint
* @note The number of dimensions to reverse must be given as a preprocessor argument using -DNUM_REVERSE_DIMS=num, e.g. -DNUM_REVERSE_DIMS=3
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/select.cl b/src/core/CL/cl_kernels/select.cl
index cb7988e7b0..52ef81560a 100644
--- a/src/core/CL/cl_kernels/select.cl
+++ b/src/core/CL/cl_kernels/select.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@
* @param[in] c_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] c_step_z c_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] c_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] x_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] x_ptr Pointer to the source tensor. Supported data types: All
* @param[in] x_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] x_step_x x_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] x_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -46,7 +46,7 @@
* @param[in] x_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] x_step_z x_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] x_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] y_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] y_ptr Pointer to the source tensor. Supported data types: same as @p x_ptr
* @param[in] y_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] y_step_x y_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] y_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -54,7 +54,7 @@
* @param[in] y_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] y_step_z y_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] y_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p x_ptr
* @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -98,7 +98,7 @@ __kernel void select_same_rank(
* @param[in] c_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] c_step_x c_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] c_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] x_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] x_ptr Pointer to the source tensor. Supported data types: All
* @param[in] x_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] x_step_x x_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] x_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -106,7 +106,7 @@ __kernel void select_same_rank(
* @param[in] x_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] x_step_z x_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] x_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] y_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] y_ptr Pointer to the source tensor. Supported data types: same as @p x_ptr
* @param[in] y_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] y_step_x y_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] y_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -114,7 +114,7 @@ __kernel void select_same_rank(
* @param[in] y_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] y_step_z y_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] y_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p x_ptr
* @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
@@ -162,7 +162,7 @@ __kernel void select_different_rank_2(
* @param[in] c_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] c_step_x c_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] c_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] x_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] x_ptr Pointer to the source tensor. Supported data types: All
* @param[in] x_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] x_step_x x_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] x_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -170,7 +170,7 @@ __kernel void select_different_rank_2(
* @param[in] x_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] x_step_z x_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] x_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[in] y_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[in] y_ptr Pointer to the source tensor. Supported data types: same as @p x_ptr
* @param[in] y_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] y_step_x y_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] y_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -178,7 +178,7 @@ __kernel void select_different_rank_2(
* @param[in] y_stride_z Stride of the source tensor in Z dimension (in bytes)
* @param[in] y_step_z y_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] y_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] out_ptr Pointer to the destination tensor. Supported data types: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+ * @param[out] out_ptr Pointer to the destination tensor. Supported data types: same as @p x_ptr
* @param[in] out_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] out_stride_y Stride of the destination tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
index 96e2b15a9c..f4c5c4b60e 100644
--- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
@@ -96,7 +96,7 @@ int4 mult_by_quantized_multiplier_parallel(int4 data)
* @note Quantized beta can be optionally passed at compile time using -DINPUT_BETA_MULTIPLIER and -DINPUT_BETA_LEFT_SHIFT (if undefined, assume beta equals 1.0)
* @note -DDIFF_MIN must be passed at compile time. It is threshold difference between maximum value of input data and current processed value, it defines whether the value will be taken into account or not.
*
- * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QASYMM8
+ * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
@@ -548,7 +548,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel(
* @param[in] sum_stride_z Stride of the sum values tensor in Z dimension (in bytes)
* @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor
- * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: QASYMM8
+ * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: QASYMM8/QASYMM8_SIGNED
* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/stack_layer.cl b/src/core/CL/cl_kernels/stack_layer.cl
index 59dee1d2fb..438e858df2 100644
--- a/src/core/CL/cl_kernels/stack_layer.cl
+++ b/src/core/CL/cl_kernels/stack_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,7 +66,7 @@
* @note Dimension 2 of the input tensor must be passed at compile time using -DSRC_DIM2 (e.g. -DSRC_DIM2=112)
* @note Dimension 3 of the output tensor must be passed at compile time using -DDST_DIM3 (e.g. -DDST_DIM3=112)
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: All
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/transpose.cl b/src/core/CL/cl_kernels/transpose.cl
index 51860b60a5..785be6c710 100644
--- a/src/core/CL/cl_kernels/transpose.cl
+++ b/src/core/CL/cl_kernels/transpose.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -132,7 +132,7 @@
* -# -DDATA_TYPE_IN_BYTES=2 for transposing U16, S16 or FP16 matrices
* -# -DDATA_TYPE_IN_BYTES=4 for transposing U32, S32 or FP32 matrices
*
- * @param[in] src_ptr Pointer to the source matrix. Supported data types: U8/S8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source matrix. Supported data types: All
* @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/upsample_layer.cl b/src/core/CL/cl_kernels/upsample_layer.cl
index 0ce242e4df..d0cc0f24b7 100644
--- a/src/core/CL/cl_kernels/upsample_layer.cl
+++ b/src/core/CL/cl_kernels/upsample_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,13 +26,13 @@
/** This function applies upsample on an input image. (NCHW)
*
* @attention The following variables must be passed at compile time:
- * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * -# -DDATA_TYPE = Tensor data type. Supported data types: All
* -# -DVEC_SIZE_IN = Input vector size
* -# -DVEC_SIZE_OUT = Output vector size
* -# -DLAST_ACCESSED_X_IN = The input element that is on the X border (threads trying to set this, might need to step back a bit)
* -# -DLAST_ACCESSED_X_OUT = The output element that is on the X border (threads trying to set this, might need to step back a bit)
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
@@ -81,13 +81,13 @@ __kernel void upsample_layer_nchw(
/** This function applies upsample on an input image. (NHWC)
*
* @attention The following variables must be passed at compile time:
- * -# -DDATA_TYPE = Tensor data type. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * -# -DDATA_TYPE = Tensor data type. Supported data types: All
* -# -DVEC_SIZE_IN = Input vector size
* -# -DVEC_SIZE_OUT = Output vector size
* -# -DLAST_ACCESSED_X_IN = The input element that is on the X border (threads trying to set this, might need to step back a bit)
* -# -DLAST_ACCESSED_X_OUT = The output element that is on the X border (threads trying to set this, might need to step back a bit)
*
- * @param[in] src_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+ * @param[in] src_ptr Pointer to the source image. Supported data types: All
* @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 9e49869e15..66751f7dd3 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -27,8 +27,6 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
@@ -37,7 +35,6 @@
#include "arm_compute/core/utils/misc/Cast.h"
#include "support/StringSupport.h"
-#include <cmath>
#include <set>
namespace arm_compute
@@ -47,7 +44,7 @@ namespace
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::F16, DataType::F32);
static std::set<ActivationLayerInfo::ActivationFunction> quantized_supported_activations =
{
diff --git a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
index f0e869500e..9c2b68acc0 100644
--- a/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp
@@ -27,20 +27,15 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
#include "support/StringSupport.h"
-#include <map>
-
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output)
@@ -102,7 +97,7 @@ void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_co
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
{
@@ -172,3 +167,4 @@ void CLBatchConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &
}
while(window.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
index 5306880035..10aa615c6f 100644
--- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -27,20 +27,15 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
#include "support/StringSupport.h"
-#include <map>
-
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
@@ -100,7 +95,7 @@ void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_co
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
{
@@ -156,3 +151,4 @@ void CLDepthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &
}
while(window.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index cd032bba77..67dac3280e 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -35,13 +35,8 @@
#include "arm_compute/core/Window.h"
#include "support/StringSupport.h"
-#include <cstdint>
-#include <set>
-#include <sstream>
-#include <string>
-
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLFillBorderKernel::CLFillBorderKernel()
: ICLKernel(), _tensor(nullptr)
{
@@ -85,7 +80,7 @@ void CLFillBorderKernel::configure(const CLCompileContext &compile_context, ICLT
// Define build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(dt));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt));
build_opts.add_option("-DBORDER_SIZE_TOP=" + support::cpp11::to_string(border_size.top));
build_opts.add_option("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom));
build_opts.add_option("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left));
@@ -191,3 +186,4 @@ void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue)
}
while(collapsed.slide_window_slice_3D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
deleted file mode 100644
index 30dee7fe21..0000000000
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLValidate.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "support/StringSupport.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(accum);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
- ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() != 1);
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases, GPUTarget gpu_target,
- unsigned int &num_elems_processed_per_iteration)
-{
- // Select the vector size to use (8 for Bifrost; 16 for Midgard).
- bool is_gpu_bifrost = gpu_target_is_in(gpu_target,
- GPUTarget::G71, GPUTarget::G72, GPUTarget::G76,
- GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT,
- GPUTarget::G52, GPUTarget::G52LIT);
- num_elems_processed_per_iteration = is_gpu_bifrost ? 8 : 16;
-
- // Configure kernel window
- Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration));
-
- AccessWindowStatic biases_access(biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration), biases->dimension(1));
- AccessWindowHorizontal accum_access(accum, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, biases_access, accum_access);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel()
- : _accum(nullptr), _biases(nullptr)
-{
-}
-
-void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
-{
- configure(CLKernelLibrary::get().get_compile_context(), accum, biases);
-}
-
-void CLGEMMMatrixAccumulateBiasesKernel::configure(const CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases)
-{
- // Perform validate step
- ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info()));
-
- _biases = biases;
- _accum = accum;
-
- // Get the target gpu
- GPUTarget gpu_target = get_target();
- unsigned int vector_size = 0;
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-
- // Add build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
- build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
-
- // Create kernel
- _kernel = create_kernel(compile_context, "gemm_accumulate_biases", build_opts.options());
-}
-
-Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target)
-{
- unsigned int num_elems_processed_per_iteration = 0;
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(), biases->clone().get(), gpu_target, num_elems_processed_per_iteration).first);
-
- return Status{};
-}
-
-void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window accum_slice = window.first_slice_window_2D();
-
- Window biases_slice(accum_slice);
- biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
-
- // Run kernel
- do
- {
- // Set arguments
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _accum, accum_slice);
- add_1D_tensor_argument(idx, _biases, biases_slice);
-
- enqueue(queue, *this, accum_slice, lws_hint());
- }
- while(window.slide_window_slice_2D(accum_slice));
-}
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 02ed1a9df5..f52384593b 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -44,7 +44,7 @@ constexpr unsigned int num_rows_read_per_iteration = 4;
Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input0->data_type()) && (output->data_type() != DataType::S32));
ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(2) != input1->dimension(1));
diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
index 9d8c42a50d..0da0d4ca1f 100644
--- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp
@@ -25,22 +25,16 @@
#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
-#include <set>
-#include <sstream>
-#include <string>
-
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLLocallyConnectedMatrixMultiplyKernel::CLLocallyConnectedMatrixMultiplyKernel()
: _input0(nullptr), _input1(nullptr), _output(nullptr)
{
@@ -52,9 +46,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1));
@@ -152,3 +144,4 @@ void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::Comma
}
while(window.slide_window_slice_2D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLPadLayerKernel.cpp b/src/core/CL/kernels/CLPadLayerKernel.cpp
index eccb6c8b63..c05df61edf 100644
--- a/src/core/CL/kernels/CLPadLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPadLayerKernel.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "support/StringSupport.h"
diff --git a/src/core/CL/kernels/CLStackLayerKernel.cpp b/src/core/CL/kernels/CLStackLayerKernel.cpp
index 718b7a299c..c283c440a3 100644
--- a/src/core/CL/kernels/CLStackLayerKernel.cpp
+++ b/src/core/CL/kernels/CLStackLayerKernel.cpp
@@ -27,10 +27,7 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
@@ -98,7 +95,7 @@ void CLStackLayerKernel::configure(const CLCompileContext &compile_context, cons
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(axis));
build_opts.add_option("-DSRC_DIM2=" + support::cpp11::to_string(input->info()->dimension(2)));
build_opts.add_option("-DDST_DIM3=" + support::cpp11::to_string(output->info()->dimension(3)));
diff --git a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
index 27d4508e1d..101055001c 100644
--- a/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
+++ b/src/core/CL/kernels/CLUpsampleLayerKernel.cpp
@@ -52,14 +52,19 @@ Status CLUpsampleLayerKernel::validate(const ITensorInfo *input, const ITensorIn
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_width) != info.x() * input->dimension(idx_width));
- ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_height) != info.y() * input->dimension(idx_height));
+ ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
+
ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.x() != 2 || info.y() != 2, "Only stride 2 is supported");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(upsampling_policy != InterpolationPolicy::NEAREST_NEIGHBOR, "Only nearest neighbor policy supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+
+ if(output->total_size() != 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_width) != info.x() * input->dimension(idx_width));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_height) != info.y() * input->dimension(idx_height));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
+ }
return Status{};
}
@@ -126,7 +131,7 @@ void CLUpsampleLayerKernel::configure(const CLCompileContext &compile_context, c
// Create kernel
CLBuildOptions build_opts;
- build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_IN=" + support::cpp11::to_string(_num_elems_processed_per_iteration_input_x));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration_x));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_IN=" + support::cpp11::to_string(std::max<int>(_input->info()->dimension(0) - _num_elems_processed_per_iteration_input_x, 0)));
diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
index 91db5b17c2..c87290d04c 100644
--- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp
@@ -28,11 +28,8 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/helpers/tensor_info.h"
@@ -110,7 +107,7 @@ void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input1->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->info()->dimension(2)));
build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->info()->dimension(0)));
diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
index d8afd1db2d..84744a68aa 100644
--- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp
@@ -28,11 +28,7 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/helpers/tensor_info.h"
@@ -131,7 +127,7 @@ void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input1->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->info()->dimension(2)));
build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->info()->dimension(0)));
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index db996ebe43..20cb962b7e 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -27,10 +27,7 @@
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Window.h"
@@ -38,8 +35,6 @@
#include "support/StringSupport.h"
-#include <map>
-
namespace arm_compute
{
namespace
@@ -106,7 +101,7 @@ void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_co
// Add build options
CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(_width_offset));
build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index a36e7dba22..e30b1dbb86 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -28,9 +28,7 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include <cmath>
#include <memory>
-#include <tuple>
namespace arm_compute
{
@@ -275,6 +273,7 @@ Status CLLSTMLayerQuantized::validate(const ITensorInfo *input,
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in,
output_state_in, cell_state_out, output_state_out);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::QASYMM8);
const int input_size = input->dimension(0);
const int batch_size = input->dimension(1);
diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp
index 579c917d50..666e7b0786 100644
--- a/src/runtime/CL/functions/CLRNNLayer.cpp
+++ b/src/runtime/CL/functions/CLRNNLayer.cpp
@@ -29,9 +29,8 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include <utility>
-
-using namespace arm_compute;
+namespace arm_compute
+{
using namespace arm_compute::misc::shape_calculator;
CLRNNLayer::CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
@@ -43,9 +42,13 @@ CLRNNLayer::CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state,
const ITensorInfo *output, const ActivationLayerInfo &info)
{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, recurrent_weights, bias, hidden_state, output);
+
const int idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
+
ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_width) != weights->dimension(idx_width));
ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_height) != recurrent_weights->dimension(idx_width));
ARM_COMPUTE_RETURN_ERROR_ON(recurrent_weights->dimension(idx_width) != recurrent_weights->dimension(1));
@@ -132,3 +135,4 @@ void CLRNNLayer::prepare()
_is_prepared = true;
}
}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 4166c3230b..54e91fb8d8 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -24,22 +24,19 @@
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
#include "arm_compute/runtime/Utils.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _reshape(), _op(), _num_of_stages(), _reduction_axis(), _is_serial(),
+ : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _reshape(), _num_of_stages(), _reduction_axis(), _is_serial(),
_is_reshape_required(false)
{
}
@@ -197,7 +194,6 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign
void CLReductionOperation::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- _op = op;
_num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
_reduction_axis = axis;
_is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis);
@@ -259,65 +255,13 @@ void CLReductionOperation::configure(const CLCompileContext &compile_context, IC
first_kernel_op = ReductionOperation::MIN;
intermediate_kernel_op = ReductionOperation::MIN;
last_kernel_op = ReductionOperation::MIN;
- switch(input->info()->data_type())
- {
- case DataType::F32:
- {
- pixelValue = PixelValue(std::numeric_limits<float>::max());
- break;
- }
- case DataType::F16:
- {
- pixelValue = PixelValue(static_cast<half>(65504.0f));
- break;
- }
- case DataType::QASYMM8:
- {
- pixelValue = std::get<1>(get_min_max(input->info()->data_type()));
- break;
- }
- case DataType::QASYMM8_SIGNED:
- {
- pixelValue = PixelValue(127, input->info()->data_type(), input->info()->quantization_info());
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported DataType");
- }
- }
+ pixelValue = std::get<1>(get_min_max(input->info()->data_type()));
break;
case ReductionOperation::MAX:
first_kernel_op = ReductionOperation::MAX;
intermediate_kernel_op = ReductionOperation::MAX;
last_kernel_op = ReductionOperation::MAX;
- switch(input->info()->data_type())
- {
- case DataType::F32:
- {
- pixelValue = PixelValue(-std::numeric_limits<float>::max());
- break;
- }
- case DataType::F16:
- {
- pixelValue = PixelValue(static_cast<half>(-65504.0f));
- break;
- }
- case DataType::QASYMM8:
- {
- pixelValue = std::get<0>(get_min_max(input->info()->data_type()));
- break;
- }
- case DataType::QASYMM8_SIGNED:
- {
- pixelValue = PixelValue(-128, input->info()->data_type(), input->info()->quantization_info());
- break;
- }
- default:
- {
- ARM_COMPUTE_ERROR("Unsupported DataType");
- }
- }
+ pixelValue = std::get<0>(get_min_max(input->info()->data_type()));
break;
default:
ARM_COMPUTE_ERROR("Not supported");