From 48a60f9f7b0b7b5cf38253b7a2ac576aac43ef78 Mon Sep 17 00:00:00 2001 From: Pablo Palmier Date: Wed, 18 Oct 2017 11:03:08 +0100 Subject: IVGCVSW-632 CL support for Softmax beta parameter Change-Id: I21da48d2f40aa900301235eaced54b7eb644b0b2 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/91307 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- src/core/CL/cl_kernels/softmax_layer.cl | 15 +++++++++++++++ src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 8 +++++++- src/runtime/CL/functions/CLSoftmaxLayer.cpp | 4 ++-- 3 files changed, 24 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl index 9b24380393..010135eb7b 100644 --- a/src/core/CL/cl_kernels/softmax_layer.cl +++ b/src/core/CL/cl_kernels/softmax_layer.cl @@ -29,6 +29,7 @@ #define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size) #define ADD_OP(x, y, type, size) ADD_SAT_OP_EXPAND((x), (y), type, size) #define SUB_OP(x, y, type, size) SUB_SAT_OP_EXPAND((x), (y), type, size) +#define MUL_OP(x, y, type, size) MUL_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION) #define DIV_OP(x, y, type, size) DIV_SAT_OP_VEC_EXPAND((x), (y), type, size, FIXED_POINT_POSITION) #define EXP_OP(x, type, size) EXP_OP_EXPAND((x), type, size, FIXED_POINT_POSITION) @@ -42,6 +43,7 @@ #define MAX_OP(x, y, type, size) max((x), (y)) #define ADD_OP(x, y, type, size) ((x) + (y)) #define SUB_OP(x, y, type, size) ((x) - (y)) +#define MUL_OP(x, y, type, size) ((x) * (y)) #define DIV_OP(x, y, type, size) ((x) / (y)) #define EXP_OP(x, type, size) exp((x)) @@ -128,6 +130,7 @@ __kernel void softmax_layer_max( * @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4 * @note In case the input is not multiple of 16 -DNON_MULTIPLE_OF_16 must be passed. + * @note Beta can be optionally passed at compile time using -DBETA (if undefined, assume it equals 1.0) * * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -175,6 +178,12 @@ __kernel void softmax_layer_shift_exp_sum( Image max = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max); Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum); +#ifdef BETA + // Initialize beta + VEC_DATA_TYPE(DATA_TYPE, 16) + beta = (VEC_DATA_TYPE(DATA_TYPE, 16))BETA; +#endif /* BETA */ + // Load max value of 1D logits vector (row) DATA_TYPE max_val = *((__global DATA_TYPE *)offset(&max, 0, 0)); @@ -189,6 +198,9 @@ __kernel void softmax_layer_shift_exp_sum( VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)offset(&src, i << 4, 0)); data = SUB_OP(data, max_val, DATA_TYPE, 16); +#ifdef BETA + data = MUL_OP(data, beta, DATA_TYPE, 16); +#endif /* BETA */ data = EXP_OP(data, DATA_TYPE, 16); vstore16(data, 0, (__global DATA_TYPE *)offset(&dst, i << 4, 0)); sum1D = ADD_OP(sum1D, data, DATA_TYPE, 16); @@ -199,6 +211,9 @@ __kernel void softmax_layer_shift_exp_sum( VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0)); data = SUB_OP(data, max_val, DATA_TYPE, 16); +#ifdef BETA + data = MUL_OP(data, beta, DATA_TYPE, 16); +#endif /* BETA */ data = EXP_OP(data, DATA_TYPE, 16); VEC_DATA_TYPE(SELECT_DATA_TYPE, 16) widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 16)); diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index da3b9423d5..fb066bc645 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -105,10 +105,11 @@ CLLogits1DShiftExpSumKernel::CLLogits1DShiftExpSumKernel() { } -void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum) +void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output); + ARM_COMPUTE_ERROR_ON(beta != 1.0f && input->info()->data_type() != DataType::F32); // Output auto initialization if not yet initialized auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); @@ -145,6 +146,11 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen build_opts.emplace("-DNON_MULTIPLE_OF_16"); } + if(beta != 1.0f) + { + build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(beta))); + } + // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts)); diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 7505a2c974..fa324ee61d 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -35,7 +35,7 @@ CLSoftmaxLayer::CLSoftmaxLayer(std::shared_ptr memory_manager) { } -void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output) +void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output, float beta) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); @@ -55,7 +55,7 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output) // Configure Kernels _max_kernel.configure(input, &_max); - _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); + _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta); _norm_kernel.configure(&_tmp, &_sum, output); // Allocate intermediate buffers -- cgit v1.2.1