aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-07-04 12:47:17 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commit21efeb4491feab09dc246f4da0023d7ca79b1d32 (patch)
tree2fbacd7676d13a3ec7024e517acf2e462355275a /src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
parent368da83fdd7406d629e8cca64f3eb0af05437419 (diff)
downloadComputeLibrary-21efeb4491feab09dc246f4da0023d7ca79b1d32.tar.gz
COMPMID-417: DepthConvert NEON for QS8/QS16.
Change-Id: Ieb120bccf146045b3a0001ceb3893d4e67fd19df Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79763 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Steven Niu <steven.niu@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
index b81be6cee9..8381dd8a73 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp
@@ -456,7 +456,7 @@ void vector_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, IT
// Multiply by the weight of the matrix product (alpha)
if(multiply_alpha)
{
- const qint8x8_t alpha_qs8 = vdup_n_qs8(scvt_qs8_f32(alpha, fixed_point_position));
+ const qint8x8_t alpha_qs8 = vdup_n_qs8(sqcvt_qs8_f32(alpha, fixed_point_position));
acc00_qs8 = vqmul_qs8(acc00_qs8, alpha_qs8, fixed_point_position);
acc01_qs8 = vqmul_qs8(acc01_qs8, alpha_qs8, fixed_point_position);
acc02_qs8 = vqmul_qs8(acc02_qs8, alpha_qs8, fixed_point_position);
@@ -585,7 +585,7 @@ void vector_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, I
// Multiply by the weight of the matrix product (alpha)
if(multiply_alpha)
{
- const qint16x4_t alpha_qs16 = vdup_n_qs16(scvt_qs16_f32(alpha, fixed_point_position));
+ const qint16x4_t alpha_qs16 = vdup_n_qs16(sqcvt_qs16_f32(alpha, fixed_point_position));
acc00_qs16 = vqmul_qs16(acc00_qs16, alpha_qs16, fixed_point_position);
acc01_qs16 = vqmul_qs16(acc01_qs16, alpha_qs16, fixed_point_position);
acc02_qs16 = vqmul_qs16(acc02_qs16, alpha_qs16, fixed_point_position);
@@ -1058,7 +1058,7 @@ void matrix_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, IT
const size_t out_stride3 = out_stride1 * 3;
const int num_elems_matrix_b_x = input1->info()->dimension(0);
const int fixed_point_position = input0->info()->fixed_point_position();
- const qint8x8_t alpha_qs8 = vdup_n_qs8(scvt_qs8_f32(alpha, fixed_point_position));
+ const qint8x8_t alpha_qs8 = vdup_n_qs8(sqcvt_qs8_f32(alpha, fixed_point_position));
ARM_COMPUTE_UNUSED(alpha_qs8);
// Set step_x and step_y for matrix A. Scale by a factor of 4 the Y range as the input interleaved matrix A has 4 times less the rows of the output matrix
@@ -1291,7 +1291,7 @@ void matrix_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, I
const size_t out_stride3 = out_stride1 * 3;
const int num_elems_matrix_b_x = input1->info()->dimension(0);
const int fixed_point_position = input0->info()->fixed_point_position();
- const qint16x4_t alpha_qs16 = vdup_n_qs16(scvt_qs16_f32(alpha, fixed_point_position));
+ const qint16x4_t alpha_qs16 = vdup_n_qs16(sqcvt_qs16_f32(alpha, fixed_point_position));
ARM_COMPUTE_UNUSED(alpha_qs16);
// Set step_x and step_y for matrix A. Scale by a factor of 4 the Y range as the input interleaved matrix A has 4 times less the rows of the output matrix