From 77d3d2445fa590dee505c4171daef9dd6c8124ca Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Mon, 10 Aug 2020 22:50:17 +0100 Subject: COMPMID-3607: Fix softmax summation logic for QASYMM8_SIGNED For the elements that shouldn't contribute to the sum, zero is used to compute the correct sum. Change-Id: I5360534b5b0f81ee3d3aaaf5a046b99ecd943894 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3703 Reviewed-by: Pablo Marquez Reviewed-by: TeresaARM Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/softmax_layer_quantized.cl | 15 ++++++++------- tests/validation/CL/SoftmaxLayer.cpp | 10 +--------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl index f4c5c4b60e..22b8df8f74 100644 --- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl +++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl @@ -194,7 +194,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial( data_fp = asymm_rescale(data_fp, 0, EXP_ACCUMULATION_INT_BITS); VSTORE(VECTOR_SIZE) (data_diff, 0, (__global int *)offset(&dst, i << LOG_VECTOR_SIZE, 0)); - sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (VEC_INT)(DIFF_MIN)); + sum1D = sum1D + select(0, data_fp, data_diff >= (VEC_INT)(DIFF_MIN)); } #ifdef NON_MULTIPLE_OF_VECTOR_SIZE @@ -208,8 +208,8 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial( VEC_INT widx_ = CONVERT(((VEC_UINT)(width4 << LOG_VECTOR_SIZE) + idx__) < width, VEC_INT); VSTORE(VECTOR_SIZE) (data_diff, 0, (__global int *)offset(&dst, width4 << LOG_VECTOR_SIZE, 0)); - data_fp = select(MIN_VALUE, data_fp, data_diff >= (VEC_INT)(DIFF_MIN)); - sum1D = sum1D + select(MIN_VALUE, data_fp, widx_); + data_fp = select(0, data_fp, data_diff >= (VEC_INT)(DIFF_MIN)); + sum1D = sum1D + select(0, data_fp, widx_); #endif /* NON_MULTIPLE_OF_VECTOR_SIZE */ // Perform sum reduction @@ -417,7 +417,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel( data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4); data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4); vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4, 0)); - sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN)); + sum1D = sum1D + select(0, data_fp, data_diff >= (int4)(DIFF_MIN)); } #ifdef NON_MULTIPLE_OF_GRID_SIZE //TODO: Optimize the calculation (avoid %). @@ -432,7 +432,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel( data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4); data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4); vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4, 0)); - sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN)); + sum1D = sum1D + select(0, data_fp, data_diff >= (int4)(DIFF_MIN)); } #ifdef NON_MULTIPLE_OF_VECTOR_SIZE if(boundary_workitems == 0) @@ -451,9 +451,10 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel( data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4); data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4); int4 widx = convert_int4(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width); - data_fp = select(MIN_VALUE, data_fp, widx); vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4 + 4, 0)); - sum1D = sum1D + select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN)); + data_fp = select(MIN_VALUE, data_fp, data_diff >= (int4)(DIFF_MIN)); + data_fp = select(0, data_fp, widx); + sum1D = sum1D + data_fp; } #endif /* NON_MULTIPLE_OF_VECTOR_SIZE */ #endif /* NON_MULTIPLE_OF_GRID_SIZE */ diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp index ce05edc6d4..90c3058c5d 100644 --- a/tests/validation/CL/SoftmaxLayer.cpp +++ b/tests/validation/CL/SoftmaxLayer.cpp @@ -51,14 +51,6 @@ RelativeTolerance tolerance_f32(0.001f); constexpr AbsoluteTolerance tolerance_qasymm8(1); constexpr AbsoluteTolerance tolerance_qasymm8_signed(1); -/* - The following tolerance number is used as a workaround for the mismatches - caused by float computation in reference (and NEON) kernel - and integer computations in OpenCL kernel. - COMPMID-2958 is created to investigate this. -*/ -constexpr float tolerance_number_qasymm8_signed = 0.05f; - /** CNN data types */ const auto CNNDataTypes = framework::dataset::make("DataType", { @@ -276,7 +268,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerQuantizedFixture, framewo framework::dataset::make("Axis", { 0 }))) { // Validate output - validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed, tolerance_number_qasymm8_signed); + validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed); } TEST_SUITE_END() // QASYMM8_SIGNED -- cgit v1.2.1