aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/CL/CLKernelLibrary.h35
-rw-r--r--arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h2
-rw-r--r--arm_compute/core/Helpers.inl2
-rw-r--r--arm_compute/core/Utils.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLSoftmaxLayer.h4
-rw-r--r--src/core/CL/CLKernelLibrary.cpp28
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp6
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp34
8 files changed, 83 insertions, 30 deletions
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index fc131cdcfe..d433a740ac 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -33,6 +33,41 @@
namespace arm_compute
{
+/** Build options */
+class CLBuildOptions
+{
+ using StringSet = std::set<std::string>;
+
+public:
+ /** Default constructor. */
+ CLBuildOptions();
+ /** Adds option to the existing build option list
+ *
+ * @param[in] option Option to add
+ */
+ void add_option(std::string option);
+ /** Adds option if a given condition is true;
+ *
+ * @param[in] cond Condition to check
+ * @param[in] option Option to add if condition is true
+ */
+ void add_option_if(bool cond, std::string option);
+ /** Adds first option if condition is true else the second one
+ *
+ * @param[in] cond Condition to check
+ * @param[in] option_true Option to add if condition is true
+ * @param[in] option_false Option to add if condition is false
+ */
+ void add_option_if_else(bool cond, std::string option_true, std::string option_false);
+ /** Gets the current options list set
+ *
+ * @return Build options set
+ */
+ StringSet options() const;
+
+private:
+ StringSet _build_opts; /**< Build options set */
+};
/** Program class */
class Program
{
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index 60d555019d..1e079cbb06 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -60,7 +60,7 @@ public:
*
* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
* @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[in] beta A scaling factor for the exponent. QS8/QS16/F16 only support a beta value of 1.
+ * @param[in] beta A scaling factor for the exponent.
* @param[out] output Destination tensor. Data types supported: same as @p input
* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
*/
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index 1a27684c9c..acdb9567db 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -263,7 +263,7 @@ inline bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_
inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info)
{
- if(info.quantization_info().empty() && (is_data_type_assymetric(info.data_type())))
+ if(info.quantization_info().empty() && (is_data_type_quantized_assymetric(info.data_type())))
{
info.set_quantization_info(quantization_info);
return true;
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 8e15a0a988..a77df030e6 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -755,7 +755,7 @@ inline bool is_data_type_fixed_point(DataType dt)
*
* @return True if data type is of symmetric quantized type, else false.
*/
-inline bool is_data_type_assymetric(DataType dt)
+inline bool is_data_type_quantized_assymetric(DataType dt)
{
switch(dt)
{
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index e87deb6d15..d84297e9a1 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -39,7 +39,7 @@ class ICLTensor;
/** Basic function to compute a SoftmaxLayer.
*
* Softmax is calculated by :
- * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f]
+ * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f]
*
* This function runs the following kernels:
* -# @ref CLLogits1DMaxKernel
@@ -54,7 +54,7 @@ public:
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
- * @param[in] beta A scaling factor for the exponent. QS8/QS16/F16 only support a beta value of 1.
+ * @param[in] beta A scaling factor for the exponent.
* @param[out] output Destination tensor. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f);
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 9e2b5bd600..f9142f4f40 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -35,6 +35,34 @@
using namespace arm_compute;
+CLBuildOptions::CLBuildOptions()
+ : _build_opts()
+{
+}
+
+void CLBuildOptions::add_option(std::string option)
+{
+ _build_opts.emplace(std::move(option));
+}
+
+void CLBuildOptions::add_option_if(bool cond, std::string option)
+{
+ if(cond)
+ {
+ add_option(std::move(option));
+ }
+}
+
+void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
+{
+ (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
+}
+
+CLBuildOptions::StringSet CLBuildOptions::options() const
+{
+ return _build_opts;
+}
+
Program::Program()
: _context(), _device(), _is_binary(false), _name(), _source(), _binary()
{
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 42f577cb7d..ca6760d8c2 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -70,7 +70,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
- DataType dt = input->info()->data_type();
+ const DataType dt = input->info()->data_type();
const int fixed_point_position = input->info()->fixed_point_position();
float a_const = act_info.a();
float b_const = act_info.b();
@@ -104,7 +104,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int)));
// Set scale and offset of the input and output
- if(is_data_type_assymetric(dt))
+ if(is_data_type_quantized_assymetric(dt))
{
float s1 = input->info()->quantization_info().scale;
int o1 = input->info()->quantization_info().offset;
@@ -130,7 +130,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
// Create kernel
- std::string kernel_name = is_data_type_assymetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer");
+ std::string kernel_name = is_data_type_quantized_assymetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer");
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
// Make sure _kernel is initialized before calling the parent's configure
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index fb066bc645..1b89161e24 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -109,7 +109,6 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
- ARM_COMPUTE_ERROR_ON(beta != 1.0f && input->info()->data_type() != DataType::F32);
// Output auto initialization if not yet initialized
auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
@@ -125,34 +124,25 @@ void CLLogits1DShiftExpSumKernel::configure(const ICLTensor *input, const ICLTen
_output = output;
_sum = sum;
+ const DataType dt = input->info()->data_type();
+ auto beta_int = static_cast<int>(lround(beta * (1 << input->info()->fixed_point_position())));
+
// The kernel loops over all elements in steps of 16
const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 16);
// Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- if(is_data_type_fixed_point(input->info()->data_type()))
- {
- build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())));
- }
- else if(input->info()->data_type() == DataType::F16)
- {
- build_opts.emplace("-DUSE_F16");
- }
-
+ CLBuildOptions build_opts;
+ build_opts.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)));
+ build_opts.add_option_if(is_data_type_fixed_point(dt),
+ std::string("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())));
+ build_opts.add_option_if(dt == DataType::F16, std::string("-DUSE_F16"));
// Tell the kernel that the width is not a multiple of 16
- if((input->info()->dimension(0) % max_cl_vector_width) != 0)
- {
- build_opts.emplace("-DNON_MULTIPLE_OF_16");
- }
-
- if(beta != 1.0f)
- {
- build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(beta)));
- }
+ build_opts.add_option_if((input->info()->dimension(0) % max_cl_vector_width) != 0, std::string("-DNON_MULTIPLE_OF_16"));
+ build_opts.add_option_if(is_data_type_fixed_point(dt) && (beta != 1.0f), std::string("-DBETA=" + support::cpp11::to_string(beta_int)));
+ build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), std::string("-DBETA=" + float_to_string_with_full_precision(beta)));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts.options()));
// Set fixed arguments
unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters