aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPablo Palmier <Pablo.Palmier@arm.com>2017-10-18 11:03:08 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit48a60f9f7b0b7b5cf38253b7a2ac576aac43ef78 (patch)
tree0e0b519f8c21d61a0bb63c45eca92b5057e351cb /src
parentc9938d26cc12896c4d37eabac185b4fbe3365f93 (diff)
downloadComputeLibrary-48a60f9f7b0b7b5cf38253b7a2ac576aac43ef78.tar.gz
IVGCVSW-632 CL support for Softmax beta parameter
Change-Id: I21da48d2f40aa900301235eaced54b7eb644b0b2 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/91307 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/cl_kernels/softmax_layer.cl15
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp8
-rw-r--r--src/runtime/CL/functions/CLSoftmaxLayer.cpp4
3 files changed, 24 insertions, 3 deletions
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl
index 9b24380393..010135eb7b 100644
--- a/src/core/CL/cl_kernels/softmax_layer.cl
+++ b/src/core/CL/cl_kernels/softmax_layer.cl
@@ -29,6 +29,7 @@
#define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size)
#define ADD_OP(x, y, type, size) ADD_SAT_OP_EXPAND((x), (y), type, size)
#define SUB_OP(x, y, type, size) SUB_SAT_OP_EXPAND((x), (y), type, size)
+#define MUL_OP(x, y, type, size) MUL_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
#define DIV_OP(x, y, type, size) DIV_SAT_OP_VEC_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
#define EXP_OP(x, type, size) EXP_OP_EXPAND((x), type, size, FIXED_POINT_POSITION)
@@ -42,6 +43,7 @@
#define MAX_OP(x, y, type, size) max((x), (y))
#define ADD_OP(x, y, type, size) ((x) + (y))
#define SUB_OP(x, y, type, size) ((x) - (y))
+#define MUL_OP(x, y, type, size) ((x) * (y))
#define DIV_OP(x, y, type, size) ((x) / (y))
#define EXP_OP(x, type, size) exp((x))
@@ -128,6 +130,7 @@ __kernel void softmax_layer_max(
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note In case the input is not multiple of 16 -DNON_MULTIPLE_OF_16 must be passed.
+ * @note Beta can be optionally passed at compile time using -DBETA (if undefined, assume it equals 1.0)
*
* @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -175,6 +178,12 @@ __kernel void softmax_layer_shift_exp_sum(
Image max = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max);
Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum);
+#ifdef BETA
+ // Initialize beta
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ beta = (VEC_DATA_TYPE(DATA_TYPE, 16))BETA;
+#endif /* BETA */
+
// Load max value of 1D logits vector (row)
DATA_TYPE max_val = *((__global DATA_TYPE *)offset(&max, 0, 0));
@@ -189,6 +198,9 @@ __kernel void softmax_layer_shift_exp_sum(
VEC_DATA_TYPE(DATA_TYPE, 16)
data = vload16(0, (__global DATA_TYPE *)offset(&src, i << 4, 0));
data = SUB_OP(data, max_val, DATA_TYPE, 16);
+#ifdef BETA
+ data = MUL_OP(data, beta, DATA_TYPE, 16);
+#endif /* BETA */
data = EXP_OP(data, DATA_TYPE, 16);
vstore16(data, 0, (__global DATA_TYPE *)offset(&dst, i << 4, 0));
sum1D = ADD_OP(sum1D, data, DATA_TYPE, 16);
@@ -199,6 +211,9 @@ __kernel void softmax_layer_shift_exp_sum(
VEC_DATA_TYPE(DATA_TYPE, 16)
data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0));
data = SUB_OP(data, max_val, DATA_TYPE, 16);
+#ifdef BETA
+ data = MUL_OP(data, beta, DATA_TYPE, 16);
+#endif /* BETA */
data = EXP_OP(data, DATA_TYPE, 16);
VEC_DATA_TYPE(SELECT_DATA_TYPE, 16)
widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 16));
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index da3b9423d5..fb066bc645 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -105,10 +105,11 @@ CLLogits1DShiftExpSumKernel::CLLogits1DShiftExpSumKernel()
{
}
-void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum)
+void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
+ ARM_COMPUTE_ERROR_ON(beta != 1.0f && input->info()->data_type() != DataType::F32);
// Output auto initialization if not yet initialized
auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
@@ -145,6 +146,11 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen
build_opts.emplace("-DNON_MULTIPLE_OF_16");
}
+ if(beta != 1.0f)
+ {
+ build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(beta)));
+ }
+
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
index 7505a2c974..fa324ee61d 100644
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -35,7 +35,7 @@ CLSoftmaxLayer::CLSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
{
}
-void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output)
+void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output, float beta)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
@@ -55,7 +55,7 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output)
// Configure Kernels
_max_kernel.configure(input, &_max);
- _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
+ _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta);
_norm_kernel.configure(&_tmp, &_sum, output);
// Allocate intermediate buffers