aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-10-09 15:05:40 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitab18212dd287cc0ec9b7c1a2c72455fe75ebd13d (patch)
treef802205d85785da671ddd1949ba61b9dc36a3035 /arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
parented194b1fbec6627896c5c12f74460b9142b98f7d (diff)
downloadComputeLibrary-ab18212dd287cc0ec9b7c1a2c72455fe75ebd13d.tar.gz
COMPMID-616 - Optimizing GEMMLowp on NEON intrinsics
Change-Id: Ibbeff5d37249b6e8fc34ad496035a1511c9da5a3 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94072 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h')
-rw-r--r--arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h29
1 files changed, 9 insertions, 20 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
index f526d213cc..670274b8f3 100644
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
+++ b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
@@ -35,12 +35,9 @@ class ITensor;
* @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
* This kernel performs the following computation:
*
- * -# Convert a values from uint8 to int32 and add a_offset to each of them.
- * -# Convert b values from uint8 to int32 and add b_offset to each of them.
- * -# Compute the int32 matrix product of the resulting a * b.
- * -# Add output_offset to each entry of the result.
- * -# Multiply each entry of the result and round to the nearest integer
- * -# Clamp the resulting int32 values to the [0..255] range and cast to uint8.
+ * -# Convert a values from uint8 to int32
+ * -# Convert b values from uint8 to int32
+ * -# Compute the int32 matrix product of the resulting a * b and store the result as int32
*
*/
class NEGEMMLowpMatrixMultiplyKernel : public INEKernel
@@ -61,16 +58,12 @@ public:
* The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two
* kernels change the layout of the original matrices to be more cache-friendly.
*
- * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8
- * @param[in] input1 Input tensor containing the transposed Matrix B. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication, Data type supported: same as @p input0
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_offset Offset to be added to each element of the output matrix
- * @param[in] output_mult_int Value to be multipied to each entry of the result.
- * @param[in] shift Number of bits to shift right the result.
+ * @param[in] input0 Input tensor containing the interleaved Matrix A. Data type supported: U8
+ * @param[in] input1 Input tensor containing the transposed Matrix B. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
*/
- void configure(const ITensor *input0, const ITensor *input1, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t output_offset, int32_t output_mult_int, int32_t shift);
+ void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
+
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -78,11 +71,7 @@ private:
const ITensor *_input0;
const ITensor *_input1;
ITensor *_output;
- int32_t _a_offset;
- int32_t _b_offset;
- int32_t _output_offset;
- int32_t _output_mult_int;
- int32_t _shift;
+ bool _slide_matrix_b;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H__*/ \ No newline at end of file