aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h')
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h101
1 files changed, 51 insertions, 50 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 6ac3cefb76..1b8e5dcc1d 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,21 +24,19 @@
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "arm_compute/function_info/GEMMInfo.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
class IMemoryManager;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
class CLGEMMLowpMatrixMultiplyCore : public IFunction
@@ -54,13 +52,35 @@ public:
CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move assignment operator */
CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Default destructor */
+ ~CLGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:--------|:--------------|
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QASYMM8 |S32 |S32 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8 |QSYMM8 |S32 |S32 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |
+ * |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |
+ *
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
* This kernel performs the following computations:
*
- * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
- * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+ * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them.
+ * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
* -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
*
@@ -71,14 +91,18 @@ public:
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should be executed only for the first run
*/
- void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(const ICLTensor *a,
+ const ICLTensor *b,
+ const ICLTensor *c,
+ ICLTensor *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
/** Initialise the kernel's inputs, output
*
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
* This kernel performs the following computations:
*
- * -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
- * -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+ * -# Convert a values from 8-bit quantized to int32 and add a_offset to each of them.
+ * -# Convert b values from 8-bit quantized to int32 and add b_offset to each of them.
* -# Compute the matrix product of the resulting a * b in int32.
* -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
*
@@ -90,7 +114,12 @@ public:
* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
* if the reshape of matrix B should be executed only for the first run
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+ void configure(const CLCompileContext &compile_context,
+ const ICLTensor *a,
+ const ICLTensor *b,
+ const ICLTensor *c,
+ ICLTensor *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyCore
*
* @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8.
@@ -102,47 +131,19 @@ public:
*
* @return a status
*/
- static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, const GEMMInfo &gemm_info = GEMMInfo());
+ static Status validate(const ITensorInfo *a,
+ const ITensorInfo *b,
+ const ITensorInfo *c,
+ const ITensorInfo *output,
+ const GEMMInfo &gemm_info = GEMMInfo());
// Inherited methods overridden:
void run() override;
void prepare() override;
private:
- MemoryGroup _memory_group;
-
- // Kernels used
- CLDepthConvertLayerKernel _weights_to_qasymm8;
- CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel;
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel;
- CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- CLGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
-
- // Temporary tensors
- CLTensor _qasymm8_weights;
- CLTensor _vector_sum_col;
- CLTensor _vector_sum_row;
- CLTensor _tmp_b;
- CLTensor _mm_result_s32;
- CLTensor _gemm_output_stage_multipliers;
- CLTensor _gemm_output_stage_shifts;
-
- // Tensor pointers
- const ICLTensor *_matrix_a;
- const ICLTensor *_original_b;
- const ICLTensor *_output;
-
- int32_t _a_offset;
- int32_t _b_offset;
- bool _is_gemm_reshaped;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _run_output_stage;
- bool _convert_to_qasymm8;
- bool _run_offset_contribution;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H */ \ No newline at end of file
+#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H */