From e2428a013014feac81af60f65f9b16cc244327aa Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Thu, 9 May 2019 11:03:17 +0100 Subject: COMPMID-2194: Refactor activation function macro in OpenCL. New Macros for activation. This commit contains the new macros for activation. Only the activation_layer utilizes the new macros in this commit. Change-Id: I2fa8567cc876e8cb67a1e876652bc348b7ed23ea Signed-off-by: Usama Arif Reviewed-on: https://review.mlplatform.org/c/1104 Comments-Addressed: Arm Jenkins Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- src/core/CL/CLKernelLibrary.cpp | 3 + src/core/CL/cl_kernels/activation_float_helpers.h | 70 ++++++++++++++++++++++ src/core/CL/cl_kernels/activation_layer.cl | 13 ++-- src/core/CL/cl_kernels/batchnormalization_layer.cl | 4 +- src/core/CL/cl_kernels/helpers.h | 5 +- .../CL/cl_kernels/winograd_output_transform.cl | 4 +- src/core/CL/kernels/CLActivationLayerKernel.cpp | 1 - 7 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 src/core/CL/cl_kernels/activation_float_helpers.h diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index df60001343..b2d7e23624 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -1036,6 +1036,9 @@ Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const Stri concat_str += " -DARM_COMPUTE_DEBUG_ENABLED"; #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) + GPUTarget gpu_arch = get_arch_from_target(get_target_from_device(_device)); + concat_str += " -DGPU_ARCH=" + support::cpp11::to_string( + static_cast::type>(gpu_arch)); if(fp16_supported()) { concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 "; diff --git a/src/core/CL/cl_kernels/activation_float_helpers.h b/src/core/CL/cl_kernels/activation_float_helpers.h new file mode 100644 index 0000000000..fefbcab5df --- /dev/null +++ b/src/core/CL/cl_kernels/activation_float_helpers.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "helpers.h" + +#if GPU_ARCH == GPU_ARCH_BIFROST +#define MLA(a, b, c) (fma(c, b, a)) +#else // GPU_ARCH == GPU_ARCH_BIFROST +#define MLA(a, b, c) ((b) * (c) + (a)) +#endif // GPU_ARCH == GPU_ARCH_BIFROST +// Logistic Activation +#define logistic_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)1.0 / ((DATA_TYPE)1.0 + exp(-x))) + +// Hyperbolic Tangent Activation +#define tanh_op(DATA_TYPE, x, A_VAL, B_VAL) ((DATA_TYPE)A_VAL * tanh((DATA_TYPE)B_VAL * x)) + +// RELU Tangent Activation +#define relu_op(DATA_TYPE, x, A_VAL, B_VAL) (max((DATA_TYPE)0.0, x)) + +// Bounded RELU Activation +#define brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min((DATA_TYPE)A_VAL, max((DATA_TYPE)0.0, x))) + +// Lower Upper Bounded RELU Activation +#define lu_brelu_op(DATA_TYPE, x, A_VAL, B_VAL) (min(max(x, (DATA_TYPE)B_VAL), (DATA_TYPE)A_VAL)) + +// Leaky RELU Activation +#define lrelu_op(DATA_TYPE, x, A_VAL, B_VAL) ((min(x, (DATA_TYPE)0.0) * (DATA_TYPE)A_VAL) + max(x, (DATA_TYPE)0.0)) + +// Soft RELU Activation +#define srelu_op(DATA_TYPE, x, A_VAL, B_VAL) (log((DATA_TYPE)1.0 + exp(x))) + +// Absolute Activation +#define abs_op(DATA_TYPE, x, A_VAL, B_VAL) (fabs(x)) + +// Square Activation +#define square_op(DATA_TYPE, x, A_VAL, B_VAL) (x * x) + +// Square-root Activation +#define sqrt_op(DATA_TYPE, x, A_VAL, B_VAL) (sqrt(x)) + +// Linear Activation +#define linear_op(DATA_TYPE, x, A_VAL, B_VAL) (MLA((DATA_TYPE)B_VAL, (DATA_TYPE)A_VAL, x)) + +// Identity Activation +#define _op(DATA_TYPE, x, A_VAL, B_VAL) (x) + +#define OP(op, DATA_TYPE, x, A_VAL, B_VAL) op##_op(x, DATA_TYPE, A_VAL, B_VAL) + +#define ACTIVATION(op, DATA_TYPE, x, A_VAL, B_VAL) OP(op, x, DATA_TYPE, A_VAL, B_VAL) diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl index cf1f434972..d820753b57 100644 --- a/src/core/CL/cl_kernels/activation_layer.cl +++ b/src/core/CL/cl_kernels/activation_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) +#if defined(ACT) && defined(DATA_TYPE) && defined(VEC_SIZE) -#include "activation_helpers.h" +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -#if defined(ACT) +#include "activation_float_helpers.h" /** This performs an activation function floating point inputs. * @@ -74,11 +73,11 @@ __kernel void activation_layer( TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); // Perform activation - data = ACTIVATION_OP(ACT, data); + data = ACTIVATION(ACT, DATA_TYPE, data, A_VAL, B_VAL); // Store result VSTORE(VEC_SIZE) (data, 0, (__global DATA_TYPE *)output.ptr); } -#endif /* defined(ACT) */ \ No newline at end of file +#endif /* defined(ACT) */ diff --git a/src/core/CL/cl_kernels/batchnormalization_layer.cl b/src/core/CL/cl_kernels/batchnormalization_layer.cl index 60307bc9a7..0bb6cd7c5f 100644 --- a/src/core/CL/cl_kernels/batchnormalization_layer.cl +++ b/src/core/CL/cl_kernels/batchnormalization_layer.cl @@ -32,7 +32,9 @@ #if defined(VEC_SIZE) && defined(DATA_TYPE) #if defined(FUSED_ACTIVATION) -#include "activation_layer.cl" +#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) +#include "activation_helpers.h" #define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) #else /* defined(FUSED_ACTIVATION) */ #define ACTIVATION_FUNC(x) (x) diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h index 180bd50528..792544dc61 100644 --- a/src/core/CL/cl_kernels/helpers.h +++ b/src/core/CL/cl_kernels/helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,9 @@ #pragma OPENCL EXTENSION cl_arm_printf : enable #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) && defined(cl_arm_printf) +#define GPU_ARCH_MIDGARD 0x100 +#define GPU_ARCH_BIFROST 0x200 + #define EXPAND(x) x #define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val) diff --git a/src/core/CL/cl_kernels/winograd_output_transform.cl b/src/core/CL/cl_kernels/winograd_output_transform.cl index cffc12d6ed..f24bf06c50 100644 --- a/src/core/CL/cl_kernels/winograd_output_transform.cl +++ b/src/core/CL/cl_kernels/winograd_output_transform.cl @@ -24,7 +24,9 @@ #include "helpers.h" #if defined(FUSED_ACTIVATION) -#include "activation_layer.cl" +#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) +#include "activation_helpers.h" #define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) #else /* defined(FUSED_ACTIVATION) */ #define ACTIVATION_FUNC(x) (x) diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 100184d2f3..d601dfc20d 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -134,7 +134,6 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act CLBuildOptions build_opts; build_opts.add_option_if(!is_logistic_activation_quantized, "-DACT=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); - build_opts.add_option(("-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); if(is_data_type_quantized(dt)) -- cgit v1.2.1