From 388d3ec5289d5aa7415d6599137a74c4e5eaeeaf Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Thu, 2 Nov 2017 12:17:56 +0000
Subject: COMPMID-556: Support beta for all softmax data types.

Change-Id: I4c0ca033dc53829fb7ac3dd7c7469d143be74e73
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94251
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 src/core/CL/CLKernelLibrary.cpp                 | 28 ++++++++++++++++++++
 src/core/CL/kernels/CLActivationLayerKernel.cpp |  6 ++---
 src/core/CL/kernels/CLSoftmaxLayerKernel.cpp    | 34 +++++++++----------------
 3 files changed, 43 insertions(+), 25 deletions(-)

(limited to 'src/core')

diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 9e2b5bd600..f9142f4f40 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -35,6 +35,34 @@
 
 using namespace arm_compute;
 
+CLBuildOptions::CLBuildOptions()
+    : _build_opts()
+{
+}
+
+void CLBuildOptions::add_option(std::string option)
+{
+    _build_opts.emplace(std::move(option));
+}
+
+void CLBuildOptions::add_option_if(bool cond, std::string option)
+{
+    if(cond)
+    {
+        add_option(std::move(option));
+    }
+}
+
+void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
+{
+    (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
+}
+
+CLBuildOptions::StringSet CLBuildOptions::options() const
+{
+    return _build_opts;
+}
+
 Program::Program()
     : _context(), _device(), _is_binary(false), _name(), _source(), _binary()
 {
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 42f577cb7d..ca6760d8c2 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -70,7 +70,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
     }
 
     const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
-    DataType           dt                                = input->info()->data_type();
+    const DataType     dt                                = input->info()->data_type();
     const int          fixed_point_position              = input->info()->fixed_point_position();
     float              a_const                           = act_info.a();
     float              b_const                           = act_info.b();
@@ -104,7 +104,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
         build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
 
         // Set scale and offset of the input and output
-        if(is_data_type_assymetric(dt))
+        if(is_data_type_quantized_assymetric(dt))
         {
             float s1 = input->info()->quantization_info().scale;
             int   o1 = input->info()->quantization_info().offset;
@@ -130,7 +130,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
     }
 
     // Create kernel
-    std::string kernel_name = is_data_type_assymetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer");
+    std::string kernel_name = is_data_type_quantized_assymetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer");
     _kernel                 = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
 
     // Make sure _kernel is initialized before calling the parent's configure
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index fb066bc645..1b89161e24 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -109,7 +109,6 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
-    ARM_COMPUTE_ERROR_ON(beta != 1.0f && input->info()->data_type() != DataType::F32);
 
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
@@ -125,34 +124,25 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen
     _output = output;
     _sum    = sum;
 
+    const DataType dt       = input->info()->data_type();
+    auto           beta_int = static_cast<int>(lround(beta * (1 << input->info()->fixed_point_position())));
+
     // The kernel loops over all elements in steps of 16
     const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 16);
 
     // Set build options
-    std::set<std::string> build_opts;
-    build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-    if(is_data_type_fixed_point(input->info()->data_type()))
-    {
-        build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())));
-    }
-    else if(input->info()->data_type() == DataType::F16)
-    {
-        build_opts.emplace("-DUSE_F16");
-    }
-
+    CLBuildOptions build_opts;
+    build_opts.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
+    build_opts.add_option_if(is_data_type_fixed_point(dt),
+                             std::string("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())));
+    build_opts.add_option_if(dt == DataType::F16, std::string("-DUSE_F16"));
     // Tell the kernel that the width is not a multiple of 16
-    if((input->info()->dimension(0) % max_cl_vector_width) != 0)
-    {
-        build_opts.emplace("-DNON_MULTIPLE_OF_16");
-    }
-
-    if(beta != 1.0f)
-    {
-        build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(beta)));
-    }
+    build_opts.add_option_if((input->info()->dimension(0) % max_cl_vector_width) != 0, std::string("-DNON_MULTIPLE_OF_16"));
+    build_opts.add_option_if(is_data_type_fixed_point(dt) && (beta != 1.0f), std::string("-DBETA=" + support::cpp11::to_string(beta_int)));
+    build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), std::string("-DBETA=" + float_to_string_with_full_precision(beta)));
 
     // Create kernel
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts.options()));
 
     // Set fixed arguments
     unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
-- 
cgit v1.2.1