aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h22
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h52
3 files changed, 52 insertions, 31 deletions
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 04ce1cf635..8dfb6c86c0 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -78,7 +78,8 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -98,7 +99,8 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
@@ -120,7 +122,8 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
* @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
* Data types supported: Same as @p input.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 017bf78938..3392f11b06 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -60,7 +60,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/F16/F32.
+ * Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[out] output Destination tensor. Data types supported: Same as @p weights.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
@@ -69,7 +69,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/F16/F32.
+ * Data type supported: QASYMM8/QSYMM8_PER_CHANNEL/F16/F32.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
* @param[in] output Destination tensor. Data types supported: Same as @p weights.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
@@ -168,7 +168,8 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -187,7 +188,8 @@ public:
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
* while every optional dimension from 4 and above represent a batch of inputs.
* Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
* @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
@@ -212,7 +214,7 @@ private:
/** Configures the appropriate matrix multiply routine
*
* @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: Same as @p input.
+ * @param[in] weights Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
* @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
* @param[in, out] output Output tensor. Data types supported: Same as @p input,
@@ -225,12 +227,12 @@ private:
const ActivationLayerInfo &act_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8 type where output should be of S32 type.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor info. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
* Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input,
+ * except for input of QASYMM8 type where output should be of S32 type.
* @param[in] gemmlowp_output_stage GEMMLowp output stage info
* @param[in] gemm_3d_depth Depth of GEMM 3D
* @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout.
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 6aacbf6abd..b364653a36 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -24,6 +24,7 @@
#ifndef __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__
#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
@@ -49,6 +50,7 @@ class ICLTensor;
* -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
* -# @ref CLGEMMLowpOffsetContributionKernel (if gemm_info.gemmlowp_output_stage == NONE)
* -# @ref CLGEMMLowpOffsetContributionOutputStageKernel (if gemm_info.gemmlowp_output_stage != NONE)
+ * -# @ref CLDepthConvertLayerKernel
*
*/
class CLGEMMLowpMatrixMultiplyCore : public IFunction
@@ -84,10 +86,10 @@ public:
void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyCore
*
- * @param[in] a First input tensor (Matrix A). Data type supported: QASYMM8.
- * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a
- * @param[in] c Third input tensor (Matrix C). It can be a nullptr. Data type supported: S32
- * @param[in] output Output tensor. Data type supported: S32 or QASYMM8 if gemm_info.gemmlowp_output_stage != NONE
+ * @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8.
+ * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a
+ * @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32
+ * @param[in] output Output tensor info. Data type supported: S32 or QASYMM8 if gemm_info.gemmlowp_output_stage != NONE
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should be executed only for the first run
*
@@ -100,7 +102,10 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
+ MemoryGroup _memory_group;
+
+ // Kernels used
+ CLDepthConvertLayerKernel _weights_to_qasymm8;
CLGEMMLowpMatrixMultiplyKernel _mm_midgard_kernel;
CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel;
CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
@@ -109,18 +114,29 @@ private:
CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
CLGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
- CLTensor _vector_sum_col;
- CLTensor _vector_sum_row;
- CLTensor _tmp_b;
- CLTensor _mm_result_s32;
- const ICLTensor *_original_b;
- int32_t _a_offset;
- int32_t _b_offset;
- bool _is_gemm_reshaped;
- bool _is_midgard;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _fuse_output_stage;
+
+ // Temporary tensors
+ CLTensor _qasymm8_weights;
+ CLTensor _vector_sum_col;
+ CLTensor _vector_sum_row;
+ CLTensor _tmp_b;
+ CLTensor _mm_result_s32;
+ CLTensor _gemm_output_stage_multipliers;
+ CLTensor _gemm_output_stage_shifts;
+
+ // Tensor pointers
+ const ICLTensor *_matrix_a;
+ const ICLTensor *_original_b;
+ const ICLTensor *_output;
+
+ int32_t _a_offset;
+ int32_t _b_offset;
+ bool _is_gemm_reshaped;
+ bool _is_midgard;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+ bool _fuse_output_stage;
+ bool _convert_to_qasymm8;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__ */ \ No newline at end of file