From 2219dea47ff74cadf2a6ee1ab95e57cd96e60596 Mon Sep 17 00:00:00 2001 From: zhenglin Date: Tue, 30 Jan 2018 18:15:52 +0800 Subject: APPBROWSER-390,397,398: bugfix and fully connected validation issue on specific dataset Change-Id: I227e90445715c3bd394e49930b010c0a5f5ca177 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118108 Tested-by: Jenkins Reviewed-by: Joel Liang Reviewed-by: Anthony Barbier --- src/core/GLES_COMPUTE/cs_shaders/gemm.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/core/GLES_COMPUTE') diff --git a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs index c81bed7066..580acc16a7 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs @@ -509,7 +509,7 @@ void main() vec4 acc3 = vec4(0.0f); #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(2)); + for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(4)); TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, 2 * 2), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(2) * src1_attrs.stride_y)) { vec2 a0 = LOAD_UNPACK2_CURRENT_ITEM_HALF(src0_ptr, src0_iter); @@ -549,7 +549,7 @@ void main() vec2 a1 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 1)); #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 #if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 - vec a2 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 2)); + vec2 a2 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 2)); #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 #if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 vec2 a3 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 3)); @@ -615,7 +615,7 @@ void main() vec4 acc3 = vec4(0.0f); #endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 - for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(16)); + for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(16)); TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, uint(8) * src0_attrs.stride_x), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(8) * src1_attrs.stride_y)) { vec4 a0[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src0_ptr, src0_iter); @@ -729,7 +729,7 @@ void main() acc[0] = vec4(0.0f); acc[1] = vec4(0.0f); - for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(16)); + for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(16)); TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, uint(8) * src0_attrs.stride_x), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(8) * src1_attrs.stride_y)) { vec4 a[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src0_ptr, src0_iter); @@ -823,7 +823,7 @@ void main(void) VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR(biases_attrs, biases_shift); vec4 u[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(accum_ptr, accum_iter); - vec4 v[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(biases_ptr, bias_iter); + vec4 v[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(biases_ptr, biases_iter); vec4 r[2]; r[0] = u[0] + v[0]; -- cgit v1.2.1