From 05288a2b871ef99f544771621c3bba409b2f70df Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Tue, 21 Nov 2017 10:57:50 +0000 Subject: COMPMID-697 - Rework GEMMLowp interface on OpenCL Reworked the interface of GemmLowp in order to make easy the integration in Android NN - Added support for different output stage - Added validation for both matrix multiplication and output stage - Added bounded relu support in the output stage - Added in32_t bias support - Added optimized path for vector by matrix case This rework is required for: - Convolution quantized - Fully connected quantized Change-Id: I512283d406099cf8c614dd89d0a97ed411143afc Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110625 Reviewed-by: Georgios Pinitas Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com --- arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h | 4 ++-- .../core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h | 2 +- arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arm_compute/core/NEON/kernels') diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h index 27cb3f2c1c..989260de11 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h @@ -30,9 +30,9 @@ namespace arm_compute { class ITensor; -/* NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place +/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place * - * This kernel takes a final int32 accumulator value (the output of @NEGEMMLowpMatrixMultiplyKernel), + * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), * and adds to it the offset contribution of matrix A and matrix B in-place. * * The final result is: diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h index 7684350c0f..a522069330 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.h @@ -30,7 +30,7 @@ namespace arm_compute { class ITensor; -/* NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 +/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8 * * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value. * The following computations will be performed by the kernel: diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h index 9ca5cdf828..50d8b4070e 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h @@ -47,7 +47,7 @@ public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data type supported: S8 + * @param[in] input Input tensor. Data type supported: QASYMM8 * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32 * @param[in] k Number of matrix A columns (or matrix B rows) * @param[in] is_reshaped True if the input tensor has been reshaped -- cgit v1.2.1