From 7068f9900d136312318ff430aef588b14e0c87ad Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Thu, 26 Oct 2017 15:23:08 +0100 Subject: COMPMID-631: Merge branches/gles_compute branch Last commit: commit b25c5f68042b0c81bf611d59a1bb8535e1c42497 Author: Xinghang Zhou Date: Wed Oct 25 18:48:10 2017 +0800 Synced validation's tolerances of GCSoftmax from cl side Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- .../GLES_COMPUTE/cs_shaders/activation_layer.cs | 262 +++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs (limited to 'src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs') diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs new file mode 100644 index 0000000000..fc9da114f7 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +#ifdef DATA_TYPE_FP32 +precision highp float; +#elif defined(DATA_TYPE_FP16) +#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT) +precision highp float; +#else /*LOGISTIC_TANH_SRELU_SQRT*/ +precision mediump float; +#endif /*LOGISTIC_TANH_SRELU_SQRT*/ +#endif /*DATA_TYPE_FP32*/ + +#define ABS_OP(a) abs((a)) +#define ADD_OP(a, b) ((a) + (b)) +#define SUB_OP(a, b) ((a) - (b)) +#define MUL_OP(a, b) ((a) * (b)) +#define MLA_OP(a, b, c) ((b) * (c) + (a)) +#define DIV_OP(a, b) ((a) / (b)) +#define EXP_OP(a) exp((a)) +#define LOG_OP(a) log((a)) +#define SQRT_OP(a) sqrt((a)) +#define CONST_ONE (1.f) + +// Logistic Activation +float logistic_op(float x) +{ + return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x))); +} +// Hyperbolic Tangent Activation +float tanh_op(float x) +{ + float tmp = float(B_VAL) * x; + if(tmp > 10.f) + { + return MUL_OP(float(A_VAL), 1.f); + } + else if(tmp < -10.f) + { + return MUL_OP(float(A_VAL), -1.f); + } + else + { + return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f)); + } +} +// RELU Tangent Activation +float relu_op(float x) +{ + return max(0.f, x); +} +// Bounded RELU Activation +float brelu_op(float x) +{ + return min(float(A_VAL), max(float(0.0), x)); +} +// Lower Upper Bounded RELU Activation +float lu_brelu_op(float x) +{ + return min(max(x, float(B_VAL)), float(A_VAL)); +} +// Leaky RELU Activation +float lrelu_op(float x) +{ + return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x); +} +// Soft RELU Activation +float srelu_op(float x) +{ + return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x))); +} +// Absolute Activation +float abs_op(float x) +{ + return ABS_OP(x); +} +// Square Activation +float square_op(float x) +{ + return MUL_OP(x, x); +} +// Square-root Activation +float sqrt_op(float x) +{ + return SQRT_OP(x); +} +// Linear Activation +float linear_op(float x) +{ + return MLA_OP(float(B_VAL), float(A_VAL), x); +} + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +#ifdef DATA_TYPE_FP32 +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +/** This performs an activation function floating point inputs. + * + * @note Activation function should be given as a preprocessor argument using "#define act_name". e.g. "#define TANH" + * @note A, B variables required by some activation functions are set using A_VAL= and B_VAL= respectively. + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y ride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + float data = src_ptr[src.current_offset]; + float data_out = 0.f; + // Perform activation + +#ifdef LOGISTIC + data_out = logistic_op(data); +#elif defined(TANH) /*LOGISTIC*/ + data_out = tanh_op(data); +#elif defined(RELU) /*RELU*/ + data_out = relu_op(data); +#elif defined(BRELU) /*BRELU*/ + data_out = brelu_op(data); +#elif defined(LU_BRELU) /*LU_BRELU*/ + data_out = lu_brelu_op(data); +#elif defined(LRELU) /*LRELU*/ + data_out = lrelu_op(data); +#elif defined(SRELU) /*SRELU*/ + data_out = srelu_op(data); +#elif defined(ABS) /*ABS*/ + data_out = abs_op(data); +#elif defined(SQUARE) /*SQUARE*/ + data_out = square_op(data); +#elif defined(SQRT) /*SQRT*/ + data_out = sqrt_op(data); +#elif defined(LINEAR) /*LINEAR*/ + data_out = linear_op(data); +#else /*LOGISTIC*/ +#error Activation function not provided +#endif /*LOGISTIC*/ + + dst_ptr[dst.current_offset] = data_out; +} + +#elif defined(DATA_TYPE_FP16) +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(dst, 2, uint, writeonly); + +/** This performs an activation function floating point inputs. + * + * @note Activation function should be given as a preprocessor argument using "#define act_name". e.g. "#define TANH" + * @note A, B variables required by some activation functions are set using A_VAL= and B_VAL= respectively. + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y ride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + uint data = src_ptr[src.current_offset >> 2]; + // Perform activation + float a = unpackHalf2x16(data).x; + float b = unpackHalf2x16(data).y; + vec2 data_out; +#ifdef LOGISTIC /*LOGISTIC*/ + data_out.x = logistic_op(a); + data_out.y = logistic_op(b); +#elif defined(TANH) /*TANH*/ + data_out.x = tanh_op(a); + data_out.y = tanh_op(b); +#elif defined(RELU) /*RELU*/ + data_out.x = relu_op(a); + data_out.y = relu_op(b); +#elif defined(BRELU) /*BRELU*/ + data_out.x = brelu_op(a); + data_out.y = brelu_op(b); +#elif defined(LU_BRELU) /*LU_BRELU*/ + data_out.x = lu_brelu_op(a); + data_out.y = lu_brelu_op(b); +#elif defined(LRELU) /*LRELU*/ + data_out.x = lrelu_op(a); + data_out.y = lrelu_op(b); +#elif defined(SRELU) /*SRELU*/ + data_out.x = srelu_op(a); + data_out.y = srelu_op(b); +#elif defined(ABS) /*ABS*/ + data_out.x = abs_op(a); + data_out.y = abs_op(b); +#elif defined(SQUARE) /*SQUARE*/ + data_out.x = square_op(a); + data_out.y = square_op(b); +#elif defined(SQRT) /*SQRT*/ + data_out.x = sqrt_op(a); + data_out.y = sqrt_op(b); +#elif defined(LINEAR) /*LINEAR*/ + data_out.x = linear_op(a); + data_out.y = linear_op(b); +#else /*LOGISTIC*/ +#error Activation function not provided +#endif /*LOGISTIC*/ + + dst_ptr[dst.current_offset >> 2] = packHalf2x16(data_out); +} +#endif /*DATA_TYPE_FP32*/ -- cgit v1.2.1