aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzhenglin <zhenglin.li@arm.com>2018-01-05 10:29:21 +0800
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit6fc06ccc586c6f27f31fbc0d885dad4785f9f776 (patch)
treed73b0c4b5ecf8e261ecdb02798a2b553111559f3
parentb91e34c9837756c9ee45917e13fb6a6cb901f795 (diff)
downloadComputeLibrary-6fc06ccc586c6f27f31fbc0d885dad4785f9f776.tar.gz
APPBROWSER-374: Rewrite the dropout.cs with the common code
Change-Id: Ic2be14d626856faa4496c588154ef5cfb66d4e2c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115282 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Joel Liang <joel.liang@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/dropout.cs140
1 files changed, 42 insertions, 98 deletions
diff --git a/src/core/GLES_COMPUTE/cs_shaders/dropout.cs b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
index 54e08b1306..8dc7f0a294 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,14 +24,11 @@
layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
-#include "helpers.h"
+#include "helpers_cs.h"
-layout(std140) uniform shader_params
-{
- TENSOR3D_PARAM_DECLARATION(src);
- TENSOR3D_PARAM_DECLARATION(mask);
- TENSOR3D_PARAM_DECLARATION(dst);
-};
+#if defined(DATA_TYPE_FP16)
+precision mediump float;
+#endif /*DATA_TYPE_FP16*/
uint hash(uint x)
{
@@ -65,48 +62,34 @@ float rand(vec3 v, float seed)
return float_construct(hash(floatBitsToUint(v + seed)));
}
-#ifdef DATA_TYPE_FP32
-
-precision highp float;
-
-BUFFER_DECLARATION(src, 1, float, readonly);
-BUFFER_DECLARATION(mask, 2, float, );
-BUFFER_DECLARATION(dst, 3, float, writeonly);
-
/** Dropout is used to improve over-fit on neural networks.
*
- * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
*
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32
- * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr
- * @param[in] mask_stride_x Stride of the mask tensor in X dimension (in bytes)
- * @param[in] mask_step_x mask_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] mask_stride_y Stride of the mask tensor in Y dimension (in bytes)
- * @param[in] mask_step_y mask_stride_y * number of elements along y processed per workitem(in bytes)
- * @param[in] mask_stride_z Stride of the mask tensor in Z dimension (in bytes)
- * @param[in] mask_step_z mask_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] mask_offset_first_element_in_bytes The offset of the first element in the mask tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
+ * @param[in] src_attrs The attributes of the source tensor
+ * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr
+ * @param[in] mask_attrs The attributes of the mask tensor
+ * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in] dst_attrs The attributes of the destination tensor
*/
+SHADER_PARAMS_DECLARATION
+{
+ Tensor3DAttributes src_attrs;
+ Tensor3DAttributes mask_attrs;
+ Tensor3DAttributes dst_attrs;
+};
+
+#ifdef DATA_TYPE_FP32
+TENSOR_DECLARATION(1, srcBuffer, float, src_ptr, src_shift, 2, readonly);
+TENSOR_DECLARATION(2, maskBuffer, float, mask_ptr, mask_shift, 2, );
+TENSOR_DECLARATION(3, dstBuffer, float, dst_ptr, dst_shift, 2, writeonly);
+
void main(void)
{
- Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
- Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask);
- Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+ Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator mask_iter = CONVERT_TO_TENSOR3D_ITERATOR(mask_attrs, mask_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
float random = 0.f;
float inputv = 0.f;
@@ -116,64 +99,29 @@ void main(void)
#ifdef FORWARD
random = rand(vec3(gl_GlobalInvocationID.xyz), SEED);
maskv = (random > RATIO) ? 1.f : 0.f;
- GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0);
+ STORE_CURRENT_ITEM(mask_ptr, mask_iter, maskv);
#else /* FORWARD */
- GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0);
+ maskv = LOAD_CURRENT_ITEM(mask_ptr, mask_iter);
#endif /* FORWARD */
- GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0);
+ inputv = LOAD_CURRENT_ITEM(src_ptr, src_iter);
outputv = maskv * inputv * float(SCALE);
- GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0);
+ STORE_CURRENT_ITEM(dst_ptr, dst_iter, outputv);
}
#elif defined(DATA_TYPE_FP16)
+TENSOR_DECLARATION(1, srcBuffer, uint, src_ptr, src_shift, 2, readonly);
+TENSOR_DECLARATION(2, maskBuffer, uint, mask_ptr, mask_shift, 2, );
+TENSOR_DECLARATION(3, dstBuffer, uint, dst_ptr, dst_shift, 2, writeonly);
-precision mediump float;
-
-BUFFER_DECLARATION(src, 1, uint, readonly);
-BUFFER_DECLARATION(mask, 2, uint, );
-BUFFER_DECLARATION(dst, 3, uint, writeonly);
-
-/** Dropout is used to improve over-fit on neural networks.
- *
- * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
- *
- * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16
- * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
- * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr
- * @param[in] mask_stride_x Stride of the mask tensor in X dimension (in bytes)
- * @param[in] mask_step_x mask_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] mask_stride_y Stride of the mask tensor in Y dimension (in bytes)
- * @param[in] mask_step_y mask_stride_y * number of elements along y processed per workitem(in bytes)
- * @param[in] mask_stride_z Stride of the mask tensor in Z dimension (in bytes)
- * @param[in] mask_step_z mask_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] mask_offset_first_element_in_bytes The offset of the first element in the mask tensor
- * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
- * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
void main(void)
{
- Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
- Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask);
- Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+ Tensor3DIterator src_iter = CONVERT_TO_TENSOR3D_ITERATOR(src_attrs, src_shift);
+ Tensor3DIterator mask_iter = CONVERT_TO_TENSOR3D_ITERATOR(mask_attrs, mask_shift);
+ Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
float random1 = 0.f;
float random2 = 0.f;
- uint inputv = uint(0);
- uint outputv = uint(0);
- uint maskv = uint(0);
vec2 input_vec = vec2(0, 0);
vec2 output_vec = vec2(0, 0);
vec2 mask_vec = vec2(0, 0);
@@ -183,20 +131,16 @@ void main(void)
random2 = rand(vec3(float(gl_GlobalInvocationID.x) + 0.5f, gl_GlobalInvocationID.yz), SEED);
mask_vec.x = (random1 > RATIO) ? 1.f : 0.f;
mask_vec.y = (random2 > RATIO) ? 1.f : 0.f;
- maskv = packHalf2x16(mask_vec);
- GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0);
+
+ STORE_PACK2_CURRENT_ITEM_HALF(mask_ptr, mask_iter, mask_vec);
#else /* FORWARD */
- GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0);
- mask_vec = unpackHalf2x16(maskv);
+ mask_vec = LOAD_UNPACK2_CURRENT_ITEM_HALF(mask_ptr, mask_iter);
#endif /* FORWARD */
- GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0);
-
- input_vec = unpackHalf2x16(inputv);
+ input_vec = LOAD_UNPACK2_CURRENT_ITEM_HALF(src_ptr, src_iter);
output_vec = mask_vec * input_vec * float(SCALE);
- outputv = packHalf2x16(output_vec);
- GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0);
+ STORE_PACK2_CURRENT_ITEM_HALF(dst_ptr, dst_iter, output_vec);
}
#else /* DATA_TYPE_FP32 */