diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2018-02-19 15:33:41 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:18 +0000 |
commit | 72f39be2f372b9a810cb27320dba5d0722407549 (patch) | |
tree | 3ac73795c23107f7eab88cec553be9b9ed248214 /src/core/CL/cl_kernels/softmax_layer_quantized.cl | |
parent | 7bfb199c1461fd553a78ca1947855c7d25106803 (diff) | |
download | ComputeLibrary-72f39be2f372b9a810cb27320dba5d0722407549.tar.gz |
COMPMID-939 Fix mismatches and finalize CLSoftmaxLayer optimization
Change-Id: I4404f91a270e0ba7bbb7451c4c43a485fd4a3f6c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121105
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/softmax_layer_quantized.cl')
-rw-r--r-- | src/core/CL/cl_kernels/softmax_layer_quantized.cl | 7 |
1 files changed, 3 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl index 7521c8e1ee..c055381fc5 100644 --- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl +++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl @@ -232,7 +232,6 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_serial( * @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4 * @note In case the input is not a multiple of VECTOR_SIZE (2,4,8,16) -DNON_MULTIPLE_OF_VECTOR_SIZE must be passed. - * @note Beta can be optionally passed at compile time using -DBETA (by default, it is 1.0). * * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -288,7 +287,7 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel( __local uchar max_local; uchar4 uchar_min = (uchar4)0; - uchar4 max_val_vec = (uchar4)uchar_min; + uchar4 max_val_vec = uchar_min; // Number of elements per work-item. const uint row = width / GRID_SIZE; @@ -441,8 +440,8 @@ __kernel void softmax_layer_max_shift_exp_sum_quantized_parallel( int4 data_diff_mult = mult_by_quantized_multiplier_parallel(data_diff); data_fp = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 4); data_fp = ASYMM_RESCALE(data_fp, 0, EXP_ACCUMULATION_INT_BITS, 4); - uchar4 widx = convert_uchar4(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width); - data = select(uchar_min, data, widx); + int4 widx = convert_int4(((uint4)(GRID_SIZE * i * 4) + boundary_workitems * 4 + idx4) < width); + data_fp = select(0, data_fp, widx); vstore4(data_diff, 0, (__global int *)offset(&dst, i * GRID_SIZE * 4 + 4, 0)); sum1D = sum1D + select(0, data_fp, data_diff >= (int4)(DIFF_MIN)); } |