diff options
-rw-r--r-- | arm_compute/core/ITensorInfo.h | 12 | ||||
-rw-r--r-- | arm_compute/core/SubTensorInfo.h | 10 | ||||
-rw-r--r-- | arm_compute/core/TensorInfo.h | 43 | ||||
-rw-r--r-- | arm_compute/core/Types.h | 41 | ||||
-rw-r--r-- | arm_compute/core/Utils.h | 2 | ||||
-rw-r--r-- | src/core/CL/CLHelpers.cpp | 3 | ||||
-rw-r--r-- | src/core/CL/CLKernelLibrary.cpp | 5 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/activation_layer_qa8.cl | 100 | ||||
-rw-r--r-- | src/core/CL/kernels/CLActivationLayerKernel.cpp | 48 | ||||
-rw-r--r-- | src/core/TensorInfo.cpp | 10 | ||||
-rw-r--r-- | src/runtime/CL/functions/CLActivationLayer.cpp | 1 |
11 files changed, 259 insertions, 16 deletions
diff --git a/arm_compute/core/ITensorInfo.h b/arm_compute/core/ITensorInfo.h index bb3ac6e35e..09351522dd 100644 --- a/arm_compute/core/ITensorInfo.h +++ b/arm_compute/core/ITensorInfo.h @@ -190,6 +190,18 @@ public: * @param[in] valid_region Valid region to set. */ virtual void set_valid_region(ValidRegion valid_region) = 0; + + /** Get the quantization settings (scale and offset) of the tensor. + * + * @return A QuantizationInfo containing the scale and offset. + */ + virtual QuantizationInfo quantization_info() const = 0; + + /** Set the quantization settings (scale and offset) of the tensor. + * + * @param[in] quantization_info QuantizationInfo containing the scale and offset. + */ + virtual void set_quantization_info(QuantizationInfo quantization_info) = 0; }; } #endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/SubTensorInfo.h b/arm_compute/core/SubTensorInfo.h index 81a27026e7..3a88ebae5a 100644 --- a/arm_compute/core/SubTensorInfo.h +++ b/arm_compute/core/SubTensorInfo.h @@ -186,6 +186,16 @@ public: } _valid_region = std::move(valid_region); } + QuantizationInfo quantization_info() const override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + return _parent->quantization_info(); + } + void set_quantization_info(QuantizationInfo quantization_info) override + { + ARM_COMPUTE_ERROR_ON(_parent == nullptr); + _parent->set_quantization_info(quantization_info); + } private: ITensorInfo *_parent; diff --git a/arm_compute/core/TensorInfo.h b/arm_compute/core/TensorInfo.h index 35b9ccb9ff..5d1ee7c578 100644 --- a/arm_compute/core/TensorInfo.h +++ b/arm_compute/core/TensorInfo.h @@ -26,6 +26,7 @@ #include "arm_compute/core/ITensorInfo.h" +#include "ITensorInfo.h" #include "arm_compute/core/Coordinates.h" #include "arm_compute/core/Strides.h" #include "arm_compute/core/TensorShape.h" @@ -97,6 +98,16 @@ public: * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. */ TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + + /** Constructor + * + * @param[in] tensor_shape It specifies the size for each dimension of the tensor in number of elements. + * @param[in] num_channels It indicates the number of channels for each tensor element + * @param[in] data_type Data type to use for each tensor element + * @param[in] quantization_info The quantization settings for the tensor data. + */ + TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info); + /** Constructor * * @param[in] hog_info HOG's metadata used to allocate normalized HOG space @@ -147,6 +158,7 @@ public: * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. */ void init(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, int fixed_point_position = 0); + /** Initialize the metadata structure with the given parameters * * @param[in] tensor_shape Size for each dimension of the tensor in number of elements. @@ -276,6 +288,14 @@ public: { _valid_region = std::move(valid_region); } + QuantizationInfo quantization_info() const override + { + return _quantization_info; + } + void set_quantization_info(QuantizationInfo quantization_info) override + { + _quantization_info = quantization_info; + } private: /** Calculates strides, offset and total size resulting from the specified padding around the XY plane. @@ -284,17 +304,18 @@ private: */ std::tuple<Strides, size_t, size_t> calculate_padding_requirements(const PaddingSize &padding); - size_t _total_size; - int _fixed_point_position; - size_t _offset_first_element_in_bytes; - Strides _strides_in_bytes; - size_t _num_channels; - TensorShape _tensor_shape; - DataType _data_type; - Format _format; - bool _is_resizable; - ValidRegion _valid_region; - PaddingSize _padding; + size_t _total_size; + int _fixed_point_position; + size_t _offset_first_element_in_bytes; + Strides _strides_in_bytes; + size_t _num_channels; + TensorShape _tensor_shape; + DataType _data_type; + Format _format; + bool _is_resizable; + ValidRegion _valid_region; + PaddingSize _padding; + QuantizationInfo _quantization_info; }; } #endif /*__ARM_COMPUTE_TENSORINFO_H__ */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index f52dd12597..e567bac860 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -67,6 +67,7 @@ enum class DataType U8, S8, QS8, + QASYMM8, U16, S16, QS16, @@ -90,6 +91,46 @@ constexpr float SCALE_PYRAMID_HALF = 0.5f; /* Constant value used to indicate a ORB scaled pyramid */ constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01; +/** Quantization settings (used for QASYMM8 data type) */ +struct QuantizationInfo +{ + QuantizationInfo() + : scale(0.0f), offset(0) + { + } + + QuantizationInfo(float scale, int offset) + : scale(scale), offset(offset) + { + } + + float scale; /**< scale */ + int offset; /**< offset */ + + /** Quantizes a value using the scale/offset in this QuantizationInfo */ + uint8_t quantize(float value) const + { + ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::quantize: scale == 0"); + int quantized = static_cast<int>(value / scale + offset); + quantized = std::max(0, std::min(quantized, 255)); + return quantized; + } + + /** Dequantizes a value using the scale/offset in this QuantizationInfo */ + float dequantize(uint8_t value) const + { + ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::dequantize: scale == 0"); + float dequantized = (value - offset) * scale; + return dequantized; + } + + /** Indicates whether this QuantizationInfo has valid settings or not */ + bool empty() const + { + return scale == 0; + } +}; + struct ValidRegion { ValidRegion() diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 7f53bec2c5..149e404f5b 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -92,6 +92,7 @@ inline size_t data_size_from_type(DataType data_type) case DataType::U8: case DataType::S8: case DataType::QS8: + case DataType::QASYMM8: return 1; case DataType::U16: case DataType::S16: @@ -166,6 +167,7 @@ inline size_t element_size_from_data_type(DataType dt) case DataType::S8: case DataType::U8: case DataType::QS8: + case DataType::QASYMM8: return 1; case DataType::U16: case DataType::S16: diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 821fb4c051..09ec329e4c 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -72,6 +72,8 @@ std::string get_cl_type_from_data_type(const DataType &dt) return "qs8"; case DataType::S8: return "char"; + case DataType::QASYMM8: + return "uchar"; case DataType::U16: return "ushort"; case DataType::S16: @@ -105,6 +107,7 @@ std::string get_data_size_from_data_type(const DataType &dt) case DataType::U8: case DataType::QS8: case DataType::S8: + case DataType::QASYMM8: return "8"; case DataType::U16: case DataType::S16: diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 6e5e802538..62ef2593e7 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -107,6 +107,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map = { "accumulate_squared", "accumulate.cl" }, { "accumulate_weighted", "accumulate.cl" }, { "activation_layer", "activation_layer.cl" }, + { "activation_layer_qa8", "activation_layer_qa8.cl" }, { "arithmetic_add", "arithmetic_op.cl" }, { "arithmetic_sub", "arithmetic_op.cl" }, { "bitwise_or", "bitwise_op.cl" }, @@ -306,6 +307,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map = #include "./cl_kernels/activation_layer.clembed" }, { + "activation_layer_qa8.cl", +#include "./cl_kernels/activation_layer_qa8.clembed" + }, + { "arithmetic_op.cl", #include "./cl_kernels/arithmetic_op.clembed" }, diff --git a/src/core/CL/cl_kernels/activation_layer_qa8.cl b/src/core/CL/cl_kernels/activation_layer_qa8.cl new file mode 100644 index 0000000000..4d9bf0efad --- /dev/null +++ b/src/core/CL/cl_kernels/activation_layer_qa8.cl @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" + +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + +// Bounded RELU Activation +inline TYPE brelu_op(TYPE x) +{ + return min((TYPE)A_VAL, max(0, x)); +} +// Lower Upper Bounded RELU Activation +inline TYPE lu_brelu_op(TYPE x) +{ + return min(max(x, (TYPE)B_VAL), (TYPE)A_VAL); +} + +#define ACTIVATION_OP2(op, x) op##_op(x) +#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x) + +/** This performs an activation function on QASYMM8 inputs. + * + * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time + * + * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 + * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively. + * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively. + * @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively. + * + * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8 + * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr + * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void activation_layer_qa8( + TENSOR3D_DECLARATION(input) +#ifndef IN_PLACE + , + TENSOR3D_DECLARATION(output) +#endif /* not IN_PLACE */ +) +{ + // Get pixels pointer + Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); +#ifdef IN_PLACE + Tensor3D output = input; +#else /* IN_PLACE */ + Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); +#endif /* IN_PLACE */ + + // Load data + TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); + + // Perform activation + data = ACTIVATION_OP(ACT, data); + + // requantize to output space + float16 fdata = convert_float16(data); + fdata = round((fdata - O1_VAL) * (S1_VAL / S2_VAL) + O2_VAL); + uchar16 qdata = convert_uchar16(fdata); + + // Store result + VSTORE(VEC_SIZE) + (qdata, 0, (__global DATA_TYPE *)output.ptr); +} diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 18202c1c5b..bed407a3d1 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -34,6 +34,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" #include "support/ToolchainSupport.h" #include <cmath> @@ -47,7 +50,14 @@ CLActivationLayerKernel::CLActivationLayerKernel() void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32, DataType::QASYMM8); + + // For QA8 only lower/upper bounded relu is supported + if(input->info()->data_type() == DataType::QASYMM8) + { + ARM_COMPUTE_ERROR_ON_MSG(act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + "For QASYMM8 only lower/upper bounded relu is supported"); + } if(output != nullptr) { @@ -74,8 +84,22 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DACT=" + lower_string(string_from_activation_func(act_info.activation())))); build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const))); - build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const))); + + if(input->info()->data_type() == DataType::QASYMM8) + { + // For lower/upper bounded relu make sure that the min/max values are in the quantized input space + int a_const_u8 = input->info()->quantization_info().quantize(a_const); + int b_const_u8 = input->info()->quantization_info().quantize(b_const); + + build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_u8))); + build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_u8))); + } + else + { + build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const))); + build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const))); + } + build_opts.emplace(output == nullptr ? "-DIN_PLACE" : ""); if(is_data_type_fixed_point(input->info()->data_type())) { @@ -83,7 +107,23 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act } // Create kernel - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts)); + if(input->info()->data_type() == DataType::QASYMM8) + { + float s1 = input->info()->quantization_info().scale; + float o1 = input->info()->quantization_info().offset; + // If output is nullptr, assume same quantization scale/offset as input + float s2 = output != nullptr ? output->info()->quantization_info().scale : s1; + float o2 = output != nullptr ? output->info()->quantization_info().offset : o1; + build_opts.emplace(("-DS1_VAL=" + support::cpp11::to_string(s1))); + build_opts.emplace(("-DS2_VAL=" + support::cpp11::to_string(s2))); + build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); + build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer_qa8", build_opts)); + } + else + { + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts)); + } // Make sure _kernel is initialized before calling the parent's configure diff --git a/src/core/TensorInfo.cpp b/src/core/TensorInfo.cpp index 91a35315dc..f3cd776497 100644 --- a/src/core/TensorInfo.cpp +++ b/src/core/TensorInfo.cpp @@ -26,13 +26,14 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/HOGInfo.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" using namespace arm_compute; TensorInfo::TensorInfo() : _total_size(0), _fixed_point_position(0), _offset_first_element_in_bytes(0), _strides_in_bytes(), _num_channels(0), _tensor_shape(), _data_type(DataType::UNKNOWN), _format(Format::UNKNOWN), - _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 } + _is_resizable{ true }, _valid_region{ Coordinates(), _tensor_shape }, _padding{ 0 }, _quantization_info() { } @@ -80,6 +81,13 @@ TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, Dat init(tensor_shape, num_channels, data_type, fixed_point_position); } +TensorInfo::TensorInfo(const TensorShape &tensor_shape, size_t num_channels, DataType data_type, QuantizationInfo quantization_info) + : TensorInfo() +{ + init(tensor_shape, num_channels, data_type, 0); + _quantization_info = quantization_info; +} + TensorInfo::TensorInfo(const HOGInfo &hog_info, unsigned int width, unsigned int height) : TensorInfo() { diff --git a/src/runtime/CL/functions/CLActivationLayer.cpp b/src/runtime/CL/functions/CLActivationLayer.cpp index b64739a008..fbb90d9a8b 100644 --- a/src/runtime/CL/functions/CLActivationLayer.cpp +++ b/src/runtime/CL/functions/CLActivationLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h" +#include "arm_compute/core/Types.h" #include "support/ToolchainSupport.h" using namespace arm_compute; |