aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/ITensorInfo.h12
-rw-r--r--arm_compute/core/SubTensorInfo.h10
-rw-r--r--arm_compute/core/TensorInfo.h43
-rw-r--r--arm_compute/core/Types.h41
-rw-r--r--arm_compute/core/Utils.h2
-rw-r--r--src/core/CL/CLHelpers.cpp3
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/cl_kernels/activation_layer_qa8.cl100
-rw-r--r--src/core/CL/kernels/CLActivationLayerKernel.cpp48
-rw-r--r--src/core/TensorInfo.cpp10
-rw-r--r--src/runtime/CL/functions/CLActivationLayer.cpp1
11 files changed, 259 insertions, 16 deletions
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h
index bb3ac6e35e..09351522dd 100644
--- a/arm_compute/core/ITensorInfo.h
+++ b/arm_compute/core/ITensorInfo.h
@@ -190,6 +190,18 @@ public:
* @param[in] valid_region Valid region to set.
*/
virtual void set_valid_region(ValidRegion valid_region) = 0;
+
+ /** Get the quantization settings (scale and offset) of the tensor.
+ *
+ * @return A QuantizationInfo containing the scale and offset.
+ */
+ virtual QuantizationInfo quantization_info() const = 0;
+
+ /** Set the quantization settings (scale and offset) of the tensor.
+ *
+ * @param[in] quantization_info QuantizationInfo containing the scale and offset.
+ */
+ virtual void set_quantization_info(QuantizationInfo quantization_info) = 0;
};
}
#endif /*__ARM_COMPUTE_TENSORINFO_H__ */
diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h
index 81a27026e7..3a88ebae5a 100644
--- a/arm_compute/core/SubTensorInfo.h
+++ b/arm_compute/core/SubTensorInfo.h
@@ -186,6 +186,16 @@ public:
}
_valid_region = std::move(valid_region);
}
+ QuantizationInfo quantization_info() const override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ return _parent->quantization_info();
+ }
+ void set_quantization_info(QuantizationInfo quantization_info) override
+ {
+ ARM_COMPUTE_ERROR_ON(_parent == nullptr);
+ _parent->set_quantization_info(quantization_info);
+ }
private:
ITensorInfo *_parent;
diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h
index 35b9ccb9ff..5d1ee7c578 100644
--- a/arm_compute/core/TensorInfo.h
+++ b/arm_compute/core/TensorInfo.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/ITensorInfo.h"
+#include "ITensorInfo.h"
#include "arm_compute/core/Coordinates.h"
#include "arm_compute/core/Strides.h"
#include "arm_compute/core/TensorShape.h"
@@ -97,6 +98,16 @@ public:
* @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
*/
TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0);
+
+ /** Constructor
+ *
+ * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements.
+ * @param[in] num_channels It indicates the number of channels for each tensor element
+ * @param[in] data_type Data type to use for each tensor element
+ * @param[in] quantization_info The quantization settings for the tensor data.
+ */
+ TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info);
+
/** Constructor
*
* @param[in] hog_info HOG's metadata used to allocate normalized HOG space
@@ -147,6 +158,7 @@ public:
* @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
*/
void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0);
+
/** Initialize the metadata structure with the given parameters
*
* @param[in] tensor_shape Size for each dimension of the tensor in number of elements.
@@ -276,6 +288,14 @@ public:
{
_valid_region = std::move(valid_region);
}
+ QuantizationInfo quantization_info() const override
+ {
+ return _quantization_info;
+ }
+ void set_quantization_info(QuantizationInfo quantization_info) override
+ {
+ _quantization_info = quantization_info;
+ }
private:
/** Calculates strides, offset and total size resulting from the specified padding around the XY plane.
@@ -284,17 +304,18 @@ private:
*/
std::tuple<Strides, size_t, size_t> calculate_padding_requirements(const PaddingSize &padding);
- size_t _total_size;
- int _fixed_point_position;
- size_t _offset_first_element_in_bytes;
- Strides _strides_in_bytes;
- size_t _num_channels;
- TensorShape _tensor_shape;
- DataType _data_type;
- Format _format;
- bool _is_resizable;
- ValidRegion _valid_region;
- PaddingSize _padding;
+ size_t _total_size;
+ int _fixed_point_position;
+ size_t _offset_first_element_in_bytes;
+ Strides _strides_in_bytes;
+ size_t _num_channels;
+ TensorShape _tensor_shape;
+ DataType _data_type;
+ Format _format;
+ bool _is_resizable;
+ ValidRegion _valid_region;
+ PaddingSize _padding;
+ QuantizationInfo _quantization_info;
};
}
#endif /*__ARM_COMPUTE_TENSORINFO_H__ */
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index f52dd12597..e567bac860 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -67,6 +67,7 @@ enum class DataType
U8,
S8,
QS8,
+ QASYMM8,
U16,
S16,
QS16,
@@ -90,6 +91,46 @@ constexpr float SCALE_PYRAMID_HALF = 0.5f;
/* Constant value used to indicate a ORB scaled pyramid */
constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01;
+/** Quantization settings (used for QASYMM8 data type) */
+struct QuantizationInfo
+{
+ QuantizationInfo()
+ : scale(0.0f), offset(0)
+ {
+ }
+
+ QuantizationInfo(float scale, int offset)
+ : scale(scale), offset(offset)
+ {
+ }
+
+ float scale; /**< scale */
+ int offset; /**< offset */
+
+ /** Quantizes a value using the scale/offset in this QuantizationInfo */
+ uint8_t quantize(float value) const
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::quantize: scale == 0");
+ int quantized = static_cast<int>(value / scale + offset);
+ quantized = std::max(0, std::min(quantized, 255));
+ return quantized;
+ }
+
+ /** Dequantizes a value using the scale/offset in this QuantizationInfo */
+ float dequantize(uint8_t value) const
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::dequantize: scale == 0");
+ float dequantized = (value - offset) * scale;
+ return dequantized;
+ }
+
+ /** Indicates whether this QuantizationInfo has valid settings or not */
+ bool empty() const
+ {
+ return scale == 0;
+ }
+};
+
struct ValidRegion
{
ValidRegion()
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 7f53bec2c5..149e404f5b 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -92,6 +92,7 @@ inline size_t data_size_from_type(DataType data_type)
case DataType::U8:
case DataType::S8:
case DataType::QS8:
+ case DataType::QASYMM8:
return 1;
case DataType::U16:
case DataType::S16:
@@ -166,6 +167,7 @@ inline size_t element_size_from_data_type(DataType dt)
case DataType::S8:
case DataType::U8:
case DataType::QS8:
+ case DataType::QASYMM8:
return 1;
case DataType::U16:
case DataType::S16:
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 821fb4c051..09ec329e4c 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -72,6 +72,8 @@ std::string get_cl_type_from_data_type(const DataType &dt)
return "qs8";
case DataType::S8:
return "char";
+ case DataType::QASYMM8:
+ return "uchar";
case DataType::U16:
return "ushort";
case DataType::S16:
@@ -105,6 +107,7 @@ std::string get_data_size_from_data_type(const DataType &dt)
case DataType::U8:
case DataType::QS8:
case DataType::S8:
+ case DataType::QASYMM8:
return "8";
case DataType::U16:
case DataType::S16:
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 6e5e802538..62ef2593e7 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -107,6 +107,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "accumulate_squared", "accumulate.cl" },
{ "accumulate_weighted", "accumulate.cl" },
{ "activation_layer", "activation_layer.cl" },
+ { "activation_layer_qa8", "activation_layer_qa8.cl" },
{ "arithmetic_add", "arithmetic_op.cl" },
{ "arithmetic_sub", "arithmetic_op.cl" },
{ "bitwise_or", "bitwise_op.cl" },
@@ -306,6 +307,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map =
#include "./cl_kernels/activation_layer.clembed"
},
{
+ "activation_layer_qa8.cl",
+#include "./cl_kernels/activation_layer_qa8.clembed"
+ },
+ {
"arithmetic_op.cl",
#include "./cl_kernels/arithmetic_op.clembed"
},
diff --git a/src/core/CL/cl_kernels/activation_layer_qa8.cl b/src/core/CL/cl_kernels/activation_layer_qa8.cl
new file mode 100644
index 0000000000..4d9bf0efad
--- /dev/null
+++ b/src/core/CL/cl_kernels/activation_layer_qa8.cl
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "helpers.h"
+
+#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+
+// Bounded RELU Activation
+inline TYPE brelu_op(TYPE x)
+{
+ return min((TYPE)A_VAL, max(0, x));
+}
+// Lower Upper Bounded RELU Activation
+inline TYPE lu_brelu_op(TYPE x)
+{
+ return min(max(x, (TYPE)B_VAL), (TYPE)A_VAL);
+}
+
+#define ACTIVATION_OP2(op, x) op##_op(x)
+#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x)
+
+/** This performs an activation function on QASYMM8 inputs.
+ *
+ * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH
+ * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively.
+ * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively.
+ * @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively.
+ *
+ * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
+ * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
+ * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void activation_layer_qa8(
+ TENSOR3D_DECLARATION(input)
+#ifndef IN_PLACE
+ ,
+ TENSOR3D_DECLARATION(output)
+#endif /* not IN_PLACE */
+)
+{
+ // Get pixels pointer
+ Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+#ifdef IN_PLACE
+ Tensor3D output = input;
+#else /* IN_PLACE */
+ Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+#endif /* IN_PLACE */
+
+ // Load data
+ TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
+
+ // Perform activation
+ data = ACTIVATION_OP(ACT, data);
+
+ // requantize to output space
+ float16 fdata = convert_float16(data);
+ fdata = round((fdata - O1_VAL) * (S1_VAL / S2_VAL) + O2_VAL);
+ uchar16 qdata = convert_uchar16(fdata);
+
+ // Store result
+ VSTORE(VEC_SIZE)
+ (qdata, 0, (__global DATA_TYPE *)output.ptr);
+}
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 18202c1c5b..bed407a3d1 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -34,6 +34,9 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
#include "support/ToolchainSupport.h"
#include <cmath>
@@ -47,7 +50,14 @@ CLActivationLayerKernel::CLActivationLayerKernel()
void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32, DataType::QASYMM8);
+
+ // For QA8 only lower/upper bounded relu is supported
+ if(input->info()->data_type() == DataType::QASYMM8)
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ "For QASYMM8 only lower/upper bounded relu is supported");
+ }
if(output != nullptr)
{
@@ -74,8 +84,22 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
build_opts.emplace(("-DACT=" + lower_string(string_from_activation_func(act_info.activation()))));
build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const)));
- build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const)));
+
+ if(input->info()->data_type() == DataType::QASYMM8)
+ {
+ // For lower/upper bounded relu make sure that the min/max values are in the quantized input space
+ int a_const_u8 = input->info()->quantization_info().quantize(a_const);
+ int b_const_u8 = input->info()->quantization_info().quantize(b_const);
+
+ build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_u8)));
+ build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_u8)));
+ }
+ else
+ {
+ build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const)));
+ build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const)));
+ }
+
build_opts.emplace(output == nullptr ? "-DIN_PLACE" : "");
if(is_data_type_fixed_point(input->info()->data_type()))
{
@@ -83,7 +107,23 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts));
+ if(input->info()->data_type() == DataType::QASYMM8)
+ {
+ float s1 = input->info()->quantization_info().scale;
+ float o1 = input->info()->quantization_info().offset;
+ // If output is nullptr, assume same quantization scale/offset as input
+ float s2 = output != nullptr ? output->info()->quantization_info().scale : s1;
+ float o2 = output != nullptr ? output->info()->quantization_info().offset : o1;
+ build_opts.emplace(("-DS1_VAL=" + support::cpp11::to_string(s1)));
+ build_opts.emplace(("-DS2_VAL=" + support::cpp11::to_string(s2)));
+ build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1)));
+ build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2)));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer_qa8", build_opts));
+ }
+ else
+ {
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts));
+ }
// Make sure _kernel is initialized before calling the parent's configure
diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp
index 91a35315dc..f3cd776497 100644
--- a/src/core/TensorInfo.cpp
+++ b/src/core/TensorInfo.cpp
@@ -26,13 +26,14 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/HOGInfo.h"
#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
using namespace arm_compute;
TensorInfo::TensorInfo()
: _total_size(0), _fixed_point_position(0), _offset_first_element_in_bytes(0), _strides_in_bytes(), _num_channels(0), _tensor_shape(), _data_type(DataType::UNKNOWN), _format(Format::UNKNOWN),
- _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }
+ _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }, _quantization_info()
{
}
@@ -80,6 +81,13 @@ TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, Dat
init(tensor_shape, num_channels, data_type, fixed_point_position);
}
+TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info)
+ : TensorInfo()
+{
+ init(tensor_shape, num_channels, data_type, 0);
+ _quantization_info = quantization_info;
+}
+
TensorInfo::TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height)
: TensorInfo()
{
diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp
index b64739a008..fbb90d9a8b 100644
--- a/src/runtime/CL/functions/CLActivationLayer.cpp
+++ b/src/runtime/CL/functions/CLActivationLayer.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
+#include "arm_compute/core/Types.h"
#include "support/ToolchainSupport.h"
using namespace arm_compute;