aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h3
-rw-r--r--src/core/CL/cl_kernels/softmax_layer.cl15
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp8
-rw-r--r--src/runtime/CL/functions/CLSoftmaxLayer.cpp4
5 files changed, 28 insertions, 5 deletions
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index 1e641b48d8..60d555019d 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -60,10 +60,11 @@ public:
*
* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
* @param[in] max Max values tensor. Data types supported: same as @p input
+ * @param[in] beta A scaling factor for the exponent. QS8/QS16/F16 only support a beta value of 1.
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
*/
- void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum);
+ void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index 70a265c1ae..e87deb6d15 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -54,9 +54,10 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
+ * @param[in] beta A scaling factor for the exponent. QS8/QS16/F16 only support a beta value of 1.
* @param[out] output Destination tensor. Data types supported: same as @p input
*/
- void configure(const ICLTensor *input, ICLTensor *output);
+ void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f);
// Inherited methods overridden:
void run() override;
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl
index 9b24380393..010135eb7b 100644
--- a/src/core/CL/cl_kernels/softmax_layer.cl
+++ b/src/core/CL/cl_kernels/softmax_layer.cl
@@ -29,6 +29,7 @@
#define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size)
#define ADD_OP(x, y, type, size) ADD_SAT_OP_EXPAND((x), (y), type, size)
#define SUB_OP(x, y, type, size) SUB_SAT_OP_EXPAND((x), (y), type, size)
+#define MUL_OP(x, y, type, size) MUL_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
#define DIV_OP(x, y, type, size) DIV_SAT_OP_VEC_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
#define EXP_OP(x, type, size) EXP_OP_EXPAND((x), type, size, FIXED_POINT_POSITION)
@@ -42,6 +43,7 @@
#define MAX_OP(x, y, type, size) max((x), (y))
#define ADD_OP(x, y, type, size) ((x) + (y))
#define SUB_OP(x, y, type, size) ((x) - (y))
+#define MUL_OP(x, y, type, size) ((x) * (y))
#define DIV_OP(x, y, type, size) ((x) / (y))
#define EXP_OP(x, type, size) exp((x))
@@ -128,6 +130,7 @@ __kernel void softmax_layer_max(
* @note Datatype must be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Fixed point position must be given as a preprocessor argument using -DFIXED_POINT_POSITION=pos. e.g. DFIXED_POINT_POSITION=4
* @note In case the input is not multiple of 16 -DNON_MULTIPLE_OF_16 must be passed.
+ * @note Beta can be optionally passed at compile time using -DBETA (if undefined, assume it equals 1.0)
*
* @param[in] src_ptr Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32
* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -175,6 +178,12 @@ __kernel void softmax_layer_shift_exp_sum(
Image max = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max);
Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum);
+#ifdef BETA
+ // Initialize beta
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ beta = (VEC_DATA_TYPE(DATA_TYPE, 16))BETA;
+#endif /* BETA */
+
// Load max value of 1D logits vector (row)
DATA_TYPE max_val = *((__global DATA_TYPE *)offset(&max, 0, 0));
@@ -189,6 +198,9 @@ __kernel void softmax_layer_shift_exp_sum(
VEC_DATA_TYPE(DATA_TYPE, 16)
data = vload16(0, (__global DATA_TYPE *)offset(&src, i << 4, 0));
data = SUB_OP(data, max_val, DATA_TYPE, 16);
+#ifdef BETA
+ data = MUL_OP(data, beta, DATA_TYPE, 16);
+#endif /* BETA */
data = EXP_OP(data, DATA_TYPE, 16);
vstore16(data, 0, (__global DATA_TYPE *)offset(&dst, i << 4, 0));
sum1D = ADD_OP(sum1D, data, DATA_TYPE, 16);
@@ -199,6 +211,9 @@ __kernel void softmax_layer_shift_exp_sum(
VEC_DATA_TYPE(DATA_TYPE, 16)
data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0));
data = SUB_OP(data, max_val, DATA_TYPE, 16);
+#ifdef BETA
+ data = MUL_OP(data, beta, DATA_TYPE, 16);
+#endif /* BETA */
data = EXP_OP(data, DATA_TYPE, 16);
VEC_DATA_TYPE(SELECT_DATA_TYPE, 16)
widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 16));
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index da3b9423d5..fb066bc645 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -105,10 +105,11 @@ CLLogits1DShiftExpSumKernel::CLLogits1DShiftExpSumKernel()
{
}
-void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum)
+void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
+ ARM_COMPUTE_ERROR_ON(beta != 1.0f && input->info()->data_type() != DataType::F32);
// Output auto initialization if not yet initialized
auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
@@ -145,6 +146,11 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen
build_opts.emplace("-DNON_MULTIPLE_OF_16");
}
+ if(beta != 1.0f)
+ {
+ build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(beta)));
+ }
+
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
index 7505a2c974..fa324ee61d 100644
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -35,7 +35,7 @@ CLSoftmaxLayer::CLSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
{
}
-void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output)
+void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output, float beta)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
@@ -55,7 +55,7 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output)
// Configure Kernels
_max_kernel.configure(input, &_max);
- _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
+ _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta);
_norm_kernel.configure(&_tmp, &_sum, output);
// Allocate intermediate buffers