diff options
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/CLHelpers.cpp | 3 | ||||
-rw-r--r-- | src/core/CL/CLKernelLibrary.cpp | 5 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/activation_layer_qa8.cl | 100 | ||||
-rw-r--r-- | src/core/CL/kernels/CLActivationLayerKernel.cpp | 48 |
4 files changed, 152 insertions, 4 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index 821fb4c051..09ec329e4c 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -72,6 +72,8 @@ std::string get_cl_type_from_data_type(const DataType &dt) return "qs8"; case DataType::S8: return "char"; + case DataType::QASYMM8: + return "uchar"; case DataType::U16: return "ushort"; case DataType::S16: @@ -105,6 +107,7 @@ std::string get_data_size_from_data_type(const DataType &dt) case DataType::U8: case DataType::QS8: case DataType::S8: + case DataType::QASYMM8: return "8"; case DataType::U16: case DataType::S16: diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 6e5e802538..62ef2593e7 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -107,6 +107,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map = { "accumulate_squared", "accumulate.cl" }, { "accumulate_weighted", "accumulate.cl" }, { "activation_layer", "activation_layer.cl" }, + { "activation_layer_qa8", "activation_layer_qa8.cl" }, { "arithmetic_add", "arithmetic_op.cl" }, { "arithmetic_sub", "arithmetic_op.cl" }, { "bitwise_or", "bitwise_op.cl" }, @@ -306,6 +307,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map = #include "./cl_kernels/activation_layer.clembed" }, { + "activation_layer_qa8.cl", +#include "./cl_kernels/activation_layer_qa8.clembed" + }, + { "arithmetic_op.cl", #include "./cl_kernels/arithmetic_op.clembed" }, diff --git a/src/core/CL/cl_kernels/activation_layer_qa8.cl b/src/core/CL/cl_kernels/activation_layer_qa8.cl new file mode 100644 index 0000000000..4d9bf0efad --- /dev/null +++ b/src/core/CL/cl_kernels/activation_layer_qa8.cl @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" + +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + +// Bounded RELU Activation +inline TYPE brelu_op(TYPE x) +{ + return min((TYPE)A_VAL, max(0, x)); +} +// Lower Upper Bounded RELU Activation +inline TYPE lu_brelu_op(TYPE x) +{ + return min(max(x, (TYPE)B_VAL), (TYPE)A_VAL); +} + +#define ACTIVATION_OP2(op, x) op##_op(x) +#define ACTIVATION_OP(op, x) ACTIVATION_OP2(op, x) + +/** This performs an activation function on QASYMM8 inputs. + * + * @note In order to perform the activation function "in-place", the pre-processor -DIN_PLACE must be passed at compile time + * + * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 + * @note Activation function should be given as a preprocessor argument using -DACT=name. e.g. -DACT=TANH + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively. + * @note Quantization scales of the input/output tensors are passed in with -DS1_VAL= and -DS2_VAL= respectively. + * @note Quantization offsets of the input/output tensors are passed in with -DO1_VAL= and -DO2_VAL= respectively. + * + * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8 + * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr + * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void activation_layer_qa8( + TENSOR3D_DECLARATION(input) +#ifndef IN_PLACE + , + TENSOR3D_DECLARATION(output) +#endif /* not IN_PLACE */ +) +{ + // Get pixels pointer + Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); +#ifdef IN_PLACE + Tensor3D output = input; +#else /* IN_PLACE */ + Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); +#endif /* IN_PLACE */ + + // Load data + TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); + + // Perform activation + data = ACTIVATION_OP(ACT, data); + + // requantize to output space + float16 fdata = convert_float16(data); + fdata = round((fdata - O1_VAL) * (S1_VAL / S2_VAL) + O2_VAL); + uchar16 qdata = convert_uchar16(fdata); + + // Store result + VSTORE(VEC_SIZE) + (qdata, 0, (__global DATA_TYPE *)output.ptr); +} diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 18202c1c5b..bed407a3d1 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -34,6 +34,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" #include "support/ToolchainSupport.h" #include <cmath> @@ -47,7 +50,14 @@ CLActivationLayerKernel::CLActivationLayerKernel() void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32, DataType::QASYMM8); + + // For QA8 only lower/upper bounded relu is supported + if(input->info()->data_type() == DataType::QASYMM8) + { + ARM_COMPUTE_ERROR_ON_MSG(act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + "For QASYMM8 only lower/upper bounded relu is supported"); + } if(output != nullptr) { @@ -74,8 +84,22 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DACT=" + lower_string(string_from_activation_func(act_info.activation())))); build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const))); - build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const))); + + if(input->info()->data_type() == DataType::QASYMM8) + { + // For lower/upper bounded relu make sure that the min/max values are in the quantized input space + int a_const_u8 = input->info()->quantization_info().quantize(a_const); + int b_const_u8 = input->info()->quantization_info().quantize(b_const); + + build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_u8))); + build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_u8))); + } + else + { + build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const))); + build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const))); + } + build_opts.emplace(output == nullptr ? "-DIN_PLACE" : ""); if(is_data_type_fixed_point(input->info()->data_type())) { @@ -83,7 +107,23 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act } // Create kernel - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts)); + if(input->info()->data_type() == DataType::QASYMM8) + { + float s1 = input->info()->quantization_info().scale; + float o1 = input->info()->quantization_info().offset; + // If output is nullptr, assume same quantization scale/offset as input + float s2 = output != nullptr ? output->info()->quantization_info().scale : s1; + float o2 = output != nullptr ? output->info()->quantization_info().offset : o1; + build_opts.emplace(("-DS1_VAL=" + support::cpp11::to_string(s1))); + build_opts.emplace(("-DS2_VAL=" + support::cpp11::to_string(s2))); + build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); + build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer_qa8", build_opts)); + } + else + { + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("activation_layer", build_opts)); + } // Make sure _kernel is initialized before calling the parent's configure |