aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzhenglin <zhenglin.li@arm.com>2018-01-30 18:15:52 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:45:00 +0000
commit2219dea47ff74cadf2a6ee1ab95e57cd96e60596 (patch)
tree7c74a3353ce3df529c77caf6052b2c2a768e6274
parent11c3b33215225f5baf34c045a68982e0058af74a (diff)
downloadComputeLibrary-2219dea47ff74cadf2a6ee1ab95e57cd96e60596.tar.gz
APPBROWSER-390,397,398: bugfix and fully connected validation issue on specific dataset
Change-Id: I227e90445715c3bd394e49930b010c0a5f5ca177 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118108 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Joel Liang <joel.liang@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/gemm.cs10
-rw-r--r--tests/datasets/FullyConnectedLayerDataset.h4
-rw-r--r--tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h4
3 files changed, 10 insertions, 8 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
index c81bed7066..580acc16a7 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
@@ -509,7 +509,7 @@ void main()
vec4 acc3 = vec4(0.0f);
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(2));
+ for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(4));
TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, 2 * 2), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(2) * src1_attrs.stride_y))
{
vec2 a0 = LOAD_UNPACK2_CURRENT_ITEM_HALF(src0_ptr, src0_iter);
@@ -549,7 +549,7 @@ void main()
vec2 a1 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 1));
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
- vec a2 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 2));
+ vec2 a2 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 2));
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
vec2 a3 = LOAD_UNPACK2_HALF(src0_ptr, IMAGE_OFFSET(src0_iter, 0, 3));
@@ -615,7 +615,7 @@ void main()
vec4 acc3 = vec4(0.0f);
#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
- for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(16));
+ for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(16));
TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, uint(8) * src0_attrs.stride_x), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(8) * src1_attrs.stride_y))
{
vec4 a0[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src0_ptr, src0_iter);
@@ -729,7 +729,7 @@ void main()
acc[0] = vec4(0.0f);
acc[1] = vec4(0.0f);
- for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) < int(end_row_vec_a - uint(16));
+ for(; int(CURRENT_ITEM_OFFSET_IN_BYTES(src0_iter)) <= int(end_row_vec_a - uint(16));
TENSOR_ITERATOR_ADVANCE_IN_BYTES(src0_iter, uint(8) * src0_attrs.stride_x), TENSOR_ITERATOR_ADVANCE_IN_BYTES(src1_iter, uint(8) * src1_attrs.stride_y))
{
vec4 a[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src0_ptr, src0_iter);
@@ -823,7 +823,7 @@ void main(void)
VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR(biases_attrs, biases_shift);
vec4 u[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(accum_ptr, accum_iter);
- vec4 v[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(biases_ptr, bias_iter);
+ vec4 v[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(biases_ptr, biases_iter);
vec4 r[2];
r[0] = u[0] + v[0];
diff --git a/tests/datasets/FullyConnectedLayerDataset.h b/tests/datasets/FullyConnectedLayerDataset.h
index b2008d604b..60183f8746 100644
--- a/tests/datasets/FullyConnectedLayerDataset.h
+++ b/tests/datasets/FullyConnectedLayerDataset.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -120,6 +120,8 @@ public:
SmallFullyConnectedLayerDataset()
{
// Conv -> FC
+ add_config(TensorShape(8U, 1U, 1U), TensorShape(8U, 16U), TensorShape(16U), TensorShape(16U));
+ // Conv -> FC
add_config(TensorShape(1U, 1U, 1U, 3U), TensorShape(1U, 10U), TensorShape(10U), TensorShape(10U, 3U));
// Conv -> FC
add_config(TensorShape(9U, 5U, 7U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U));
diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
index ae5c53a053..4ec4ae647d 100644
--- a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
+++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -62,7 +62,7 @@ protected:
float max_bound = 0.f;
std::tie(min_bound, max_bound) = get_normalize_planar_yuv_layer_test_bounds<T>();
std::uniform_real_distribution<> distribution(min_bound, max_bound);
- std::uniform_real_distribution<> distribution_sd(0, max_bound);
+ std::uniform_real_distribution<> distribution_sd(0.1, max_bound);
library->fill(src_tensor, distribution, 0);
library->fill(mean_tensor, distribution, 1);
library->fill(sd_tensor, distribution_sd, 2);