aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
diff options
context:
space:
mode:
authorGian Marco <gianmarco.iodice@arm.com>2017-11-28 09:10:03 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:58 +0000
commit58c5794b917dae10ff115dd85ec69e2ca41136c1 (patch)
treef2cea2d94e6566be720256dc6105056798723699 /src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
parent754e9526a7caf50876c2db9563dc72f096093b34 (diff)
downloadComputeLibrary-58c5794b917dae10ff115dd85ec69e2ca41136c1.tar.gz
COMPMID-706 - Add GEMMLowp output stage for scaling by a fixed point number
DoD: - Implement NEON kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Implement OpenCL kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Add test for validating the result Required for: - Integration of GEMMLowp in Android NN - Convolution quantized - Fully connected quantized Change-Id: Ia963d25d695471e963961fb49a5600e78374ac4f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110981 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp12
1 files changed, 8 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 670b11fe67..edd6a9fd80 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -78,7 +78,7 @@ std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITens
window_changed = window_changed || update_window_and_padding(win, bias_access);
}
- output_result_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->tensor_shape()));
+ output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{};
return std::make_pair(err, win);
@@ -186,15 +186,15 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window)
}
};
- // Add the offset terms to GEMM's result and multiply by result_mult_int
- scale_input(in_s32, result_offset_s32, _result_mult_int);
-
// Add the bias to GEMM's result
in_s32.val[0] = vaddq_s32(in_s32.val[0], bias_s32.val[0]);
in_s32.val[1] = vaddq_s32(in_s32.val[1], bias_s32.val[1]);
in_s32.val[2] = vaddq_s32(in_s32.val[2], bias_s32.val[2]);
in_s32.val[3] = vaddq_s32(in_s32.val[3], bias_s32.val[3]);
+ // Add the offset terms to GEMM's result and multiply by result_mult_int
+ scale_input(in_s32, result_offset_s32, _result_mult_int);
+
vst1q_u8(out.ptr(), finalize_quantization<is_bounded_relu>(in_s32, result_shift_s32, min_u8, max_u8));
},
in, bias, out);
@@ -231,6 +231,10 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(DataType::QASYMM8));
+
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
(bias != nullptr) ? bias->info() : nullptr,
output->info(),