aboutsummaryrefslogtreecommitdiff
path: root/src/core/GLES_COMPUTE/cs_shaders
diff options
context:
space:
mode:
authorFrank Lei <frank.lei@arm.com>2018-02-01 14:47:14 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:47:40 +0000
commit4406fd6cc4abded564d3791324e1f48bdfd34273 (patch)
tree22fe402fe9ac7ca338df49e9eccd6eb1587ae875 /src/core/GLES_COMPUTE/cs_shaders
parent898d399a0f62c15612a52df4bff5018e783214e4 (diff)
downloadComputeLibrary-4406fd6cc4abded564d3791324e1f48bdfd34273.tar.gz
APPBROWSER-391: Fix GLES COMPUTE alignment issues
APPBROWSER-402: Performance optimization for squeezenet/xray model Change-Id: If31b186b99a6d6087164019fe94d3ac9279e3204 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119526 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/GLES_COMPUTE/cs_shaders')
-rwxr-xr-xsrc/core/GLES_COMPUTE/cs_shaders/arithmetic_add.cs30
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/concatenate.cs10
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/scale.cs30
3 files changed, 35 insertions, 35 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/arithmetic_add.cs b/src/core/GLES_COMPUTE/cs_shaders/arithmetic_add.cs
index 0ff43605ba..2ab6d5eac5 100755
--- a/src/core/GLES_COMPUTE/cs_shaders/arithmetic_add.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/arithmetic_add.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,20 +29,20 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
precision mediump float;
#define ADD(x, y) (x) + (y)
-/** This function add two images.
+/** This function add two tensors.
*
- * @param[in] src1_ptr Pointer to the first source image. Supported data types: F16
- * @param[in] src1_attrs The attributes of the first source image
- * @param[in] src2_ptr Pointer to the second source image. Supported data types: Same as @p src1_ptr
- * @param[in] src2_attrs The attributes of the second source image
- * @param[out] dst_ptr Pointer to the destination image. Supported data types: Same as @p src1_ptr
- * @param[in] dst_attrs The attributes of the destination image
+ * @param[in] src1_ptr Pointer to the first source tensor. Supported data types: F16
+ * @param[in] src1_attrs The attributes of the first source tensor
+ * @param[in] src2_ptr Pointer to the second source tensor. Supported data types: Same as @p src1_ptr
+ * @param[in] src2_attrs The attributes of the second source tensor
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: Same as @p src1_ptr
+ * @param[in] dst_attrs The attributes of the destination tensor
*/
SHADER_PARAMS_DECLARATION
{
- ImageAttributes src1_attrs;
- ImageAttributes src2_attrs;
- ImageAttributes dst_attrs;
+ Tensor3DAttributes src1_attrs;
+ Tensor3DAttributes src2_attrs;
+ Tensor3DAttributes dst_attrs;
};
TENSOR_DECLARATION(1, src1Buffer, uvec4, src1_ptr, src1_shift, 4, readonly);
@@ -51,9 +51,9 @@ TENSOR_DECLARATION(3, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);
void main(void)
{
- ImageIterator src1_iter = CONVERT_TO_IMAGE_ITERATOR(src1_attrs, src1_shift);
- ImageIterator src2_iter = CONVERT_TO_IMAGE_ITERATOR(src2_attrs, src2_shift);
- ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);
+ Tensor3DIterator src1_iter = CONVERT_TO_TENSOR3D_ITERATOR(src1_attrs, src1_shift);
+ Tensor3DIterator src2_iter = CONVERT_TO_TENSOR3D_ITERATOR(src2_attrs, src2_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
vec4 tmp1[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src1_ptr, src1_iter);
vec4 tmp2[2] = LOAD_UNPACK8_CURRENT_ITEM_HALF(src2_ptr, src2_iter);
@@ -62,4 +62,4 @@ void main(void)
addition[1] = ADD(tmp1[1], tmp2[1]);
STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, addition);
-} \ No newline at end of file
+}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs b/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs
index 0c8b5bf0bb..69ac50b4d0 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,8 +53,8 @@ void main(void)
Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
- float tmp = LOAD(src_ptr, TENSOR3D_OFFSET(src_iter, -OFFSETS_X, -OFFSETS_Y, 0));
- STORE(dst_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(dst_iter, OFFSETS_Z), tmp);
+ float tmp = LOAD(src_ptr, TENSOR3D_OFFSET(src_iter, -OFFSET_X, -OFFSET_Y, 0));
+ STORE_CURRENT_ITEM(dst_ptr, dst_iter, tmp);
}
#elif defined(DATA_TYPE_FP16)
@@ -66,7 +66,7 @@ void main(void)
Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
- uvec2 tmp = LOAD(src_ptr, TENSOR3D_OFFSET(src_iter, -OFFSETS_X, -OFFSETS_Y, 0));
- STORE(dst_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(dst_iter, OFFSETS_Z), tmp);
+ uvec2 tmp = LOAD(src_ptr, TENSOR3D_OFFSET(src_iter, -OFFSET_X, -OFFSET_Y, 0));
+ STORE_CURRENT_ITEM(dst_ptr, dst_iter, tmp);
}
#endif /*DATA_TYPE_FP16*/
diff --git a/src/core/GLES_COMPUTE/cs_shaders/scale.cs b/src/core/GLES_COMPUTE/cs_shaders/scale.cs
index b2689a257d..b72c3392aa 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/scale.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/scale.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2018 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,23 +29,23 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z =
// We DO have to use highp for DATA_TYPE_FP16 float here to calculate the coordinates of source tensor. float is highp by default, but we still write it down here to make it more clearly, and mediump is only used for src/dst tensor in shader body.
precision highp float;
-/** Performs an affine transformation on an image interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel FP16.
+/** Performs an affine transformation on an tensor interpolating with the NEAREAST NEIGHBOUR method. Input and output are single channel FP16.
*
* @param[in] src_ptr Pointer to the source tensor. Supported data types: FP16.
* @param[in] src_attrs The attributes of the source tensor
* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: FP16. (Must be the same as the input)
* @param[in] dst_attrs The attributes of the destination tensor
- * @param[in] input_width Input image width
- * @param[in] input_height Input image height
+ * @param[in] input_width Input tensor width
+ * @param[in] input_height Input tensor height
* @param[in] scale The scale factor along x/y dimension
*/
SHADER_PARAMS_DECLARATION
{
- ImageAttributes src_attrs;
- ImageAttributes dst_attrs;
- float input_width;
- float input_height;
- vec2 scale;
+ Tensor3DAttributes src_attrs;
+ Tensor3DAttributes dst_attrs;
+ float input_width;
+ float input_height;
+ vec2 scale;
};
#if defined(DATA_TYPE_FP16)
@@ -75,8 +75,8 @@ vec4[2] clamp_to_border_with_size(vec4[2] coords, float width, float height, flo
void main()
{
- ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
- ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);
+ Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(src_attrs, src_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
vec4[2] tc = clamp_to_border_with_size(transform_nearest(vec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y), scale), input_width, input_height, float(BORDER_SIZE));
@@ -85,7 +85,7 @@ void main()
for(int i = 0; i < 4; i++)
{
- uint offset_in_bytes = image_offset_in_bytes(src_iter, int(tc[0][i]), int(tc[1][i]));
+ uint offset_in_bytes = tensor3D_offset_in_bytes(src_iter, int(tc[0][i]), int(tc[1][i]), int(gl_GlobalInvocationID.z));
s = LOAD_UNPACK2_HALF(src_ptr, uint(offset_in_bytes >> src_shift));
@@ -107,15 +107,15 @@ TENSOR_DECLARATION(2, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);
void main()
{
- ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR_NO_STEP(src_attrs, src_shift);
- ImageIterator dst_iter = CONVERT_TO_IMAGE_ITERATOR(dst_attrs, dst_shift);
+ Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(src_attrs, src_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
uvec2 tc = uvec2(gl_GlobalInvocationID.x << uint(2), gl_GlobalInvocationID.y >> uint(1));
mediump vec4 s = vec4(0.0f);
mediump vec4[2] d;
- s = LOAD_UNPACK4_HALF(src_ptr, IMAGE_OFFSET(src_iter, int(tc[0]), int(tc[1])));
+ s = LOAD_UNPACK4_HALF(src_ptr, TENSOR3D_OFFSET(src_iter, int(tc[0]), int(tc[1]), int(gl_GlobalInvocationID.z)));
d[0] = vec4(s.x, s.x, s.y, s.y);
d[1] = vec4(s.z, s.z, s.w, s.w);