aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorMichel Iwaniec <michel.iwaniec@arm.com>2017-10-12 14:14:15 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit0063380ca6e43d04722707c707e610b59e1f8dde (patch)
treec60f6e5b380851cefd5aa994b75d3e4ab3484055 /src/core
parent27c9efb922832e5e6785a492e84a46934d9a47f8 (diff)
downloadComputeLibrary-0063380ca6e43d04722707c707e610b59e1f8dde.tar.gz
IVGCVSW-619: Support for Cl u8 bounded Relu
Change-Id: I3c39ecbd36f06d5376c35ed4eb38dd73533ef97e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93686 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/CLHelpers.cpp3
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/cl_kernels/activation_layer_qa8.cl100
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp48
-rw-r--r--src/core/TensorInfo.cpp10
5 files changed, 161 insertions, 5 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 821fb4c051..09ec329e4c 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -72,6 +72,8 @@ std::string get_cl_type_from_data_type(const DataType &dt)
return "qs8";
case DataType::S8:
return "char";
+ case DataType::QASYMM8:
+ return "uchar";
case DataType::U16:
return "ushort";
case DataType::S16:
@@ -105,6 +107,7 @@ std::string get_data_size_from_data_type(const DataType &dt)
case DataType::U8:
case DataType::QS8:
case DataType::S8:
+ case DataType::QASYMM8:
return "8";
case DataType::U16:
case DataType::S16:
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 6e5e802538..62ef2593e7 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -107,6 +107,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "accumulate_squared", "accumulate.cl" },
{ "accumulate_weighted", "accumulate.cl" },
{ "activation_layer", "activation_layer.cl" },
+ { "activation_layer_qa8", "activation_layer_qa8.cl" },
{ "arithmetic_add", "arithmetic_op.cl" },
{ "arithmetic_sub", "arithmetic_op.cl" },
{ "bitwise_or", "bitwise_op.cl" },
@@ -306,6 +307,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/activation_layer.clembed"
},
{
+ "activation_layer_qa8.cl",
+#include "./cl_kernels/activation_layer_qa8.clembed"
+ },
+ {
"arithmetic_op.cl",
#include "./cl_kernels/arithmetic_op.clembed"
},
diff --git a/src/core/CL/cl_kernels/activation_layer_qa8.cl b/src/core/CL/cl_kernels/activation_layer_qa8.cl
new file mode 100644
index 0000000000..4d9bf0efad
--- /dev/null
+++ b/src/core/CL/cl_kernels/activation_layer_qa8.cl
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+
+// Bounded RELU Activation
+inline TYPE brelu_op(TYPE x)
+{
+ return min((TYPE)A_VAL, max(0, x));
+}
+// Lower Upper Bounded RELU Activation
+inline TYPE lu_brelu_op(TYPE x)
+{
+ return min(max(x, (TYPE)B_VAL), (TYPE)A_VAL);
+}
+
+#define ACTIVATION_OP2(op, x) op##_op(x)
+#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x)
+
+/** This performs an activation function on QASYMM8 inputs.
+ *
+ * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
+ * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
+ * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
+ * @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
+ *
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
+ * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void activation_layer_qa8(
+ TENSOR3D_DECLARATION(input)
+#ifndef IN_PLACE
+ ,
+ TENSOR3D_DECLARATION(output)
+#endif /* not IN_PLACE */
+)
+{
+ // Get pixels pointer
+ Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+#ifdef IN_PLACE
+ Tensor3D output = input;
+#else /* IN_PLACE */
+ Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+#endif /* IN_PLACE */
+
+ // Load data
+ TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
+
+ // Perform activation
+ data = ACTIVATION_OP(ACT, data);
+
+ // requantize to output space
+ float16 fdata = convert_float16(data);
+ fdata = round((fdata - O1_VAL) * (S1_VAL / S2_VAL) + O2_VAL);
+ uchar16 qdata = convert_uchar16(fdata);
+
+ // Store result
+ VSTORE(VEC_SIZE)
+ (qdata, 0, (__global DATA_TYPE *)output.ptr);
+}
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 18202c1c5b..bed407a3d1 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -34,6 +34,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
#include "support/ToolchainSupport.h"
#include <cmath>
@@ -47,7 +50,14 @@ CLActivationLayerKernel::CLActivationLayerKernel()
void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32, DataType::QASYMM8);
+
+ // For QA8 only lower/upper bounded relu is supported
+ if(input->info()->data_type() == DataType::QASYMM8)
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ "For QASYMM8 only lower/upper bounded relu is supported");
+ }
if(output != nullptr)
{
@@ -74,8 +84,22 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DACT=" + lower_string(string_from_activation_func(act_info.activation()))));
build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const)));
- build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const)));
+
+ if(input->info()->data_type() == DataType::QASYMM8)
+ {
+ // For lower/upper bounded relu make sure that the min/max values are in the quantized input space
+ int a_const_u8 = input->info()->quantization_info().quantize(a_const);
+ int b_const_u8 = input->info()->quantization_info().quantize(b_const);
+
+ build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_u8)));
+ build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_u8)));
+ }
+ else
+ {
+ build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const)));
+ build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const)));
+ }
+
build_opts.emplace(output == nullptr ? "-DIN_PLACE" : "");
if(is_data_type_fixed_point(input->info()->data_type()))
{
@@ -83,7 +107,23 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts));
+ if(input->info()->data_type() == DataType::QASYMM8)
+ {
+ float s1 = input->info()->quantization_info().scale;
+ float o1 = input->info()->quantization_info().offset;
+ // If output is nullptr, assume same quantization scale/offset as input
+ float s2 = output != nullptr ? output->info()->quantization_info().scale : s1;
+ float o2 = output != nullptr ? output->info()->quantization_info().offset : o1;
+ build_opts.emplace(("-DS1_VAL=" + support::cpp11::to_string(s1)));
+ build_opts.emplace(("-DS2_VAL=" + support::cpp11::to_string(s2)));
+ build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1)));
+ build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2)));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer_qa8", build_opts));
+ }
+ else
+ {
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts));
+ }
// Make sure _kernel is initialized before calling the parent's configure
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 91a35315dc..f3cd776497 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -26,13 +26,14 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/HOGInfo.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
using namespace arm_compute;
TensorInfo::TensorInfo()
: _total_size(0), _fixed_point_position(0), _offset_first_element_in_bytes(0), _strides_in_bytes(), _num_channels(0), _tensor_shape(), _data_type(DataType::UNKNOWN), _format(Format::UNKNOWN),
- _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }
+ _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }, _quantization_info()
{
}
@@ -80,6 +81,13 @@ TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, Dat
init(tensor_shape, num_channels, data_type, fixed_point_position);
}
+TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info)
+ : TensorInfo()
+{
+ init(tensor_shape, num_channels, data_type, 0);
+ _quantization_info = quantization_info;
+}
+
TensorInfo::TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height)
: TensorInfo()
{