diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2018-09-12 10:18:54 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | d63dfa2fc61a33b4e675ec6bc7458d8700174134 (patch) | |
tree | 2a85a8258aaa9a5762eb589f34b3f2868705dfb5 /src/core | |
parent | 20c246a60869bada4051bd14eb9a3862be5330d7 (diff) | |
download | ComputeLibrary-d63dfa2fc61a33b4e675ec6bc7458d8700174134.tar.gz |
COMPMID-1568: Add support for QASYMM8 to CLNormalizePlanarYUV
Change-Id: Id7ea6e7f57179478e5ba0e9231274e98fa089590
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/148028
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CL/CLKernelLibrary.cpp | 6 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/normalize_planar_yuv_layer.cl | 12 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl | 158 | ||||
-rw-r--r-- | src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp | 18 |
4 files changed, 184 insertions, 10 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 8a309ec757..1bc1036bed 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -316,6 +316,8 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map = { "normalization_layer_in_map", "normalization_layer.cl" }, { "normalize_planar_yuv_layer_nchw", "normalize_planar_yuv_layer.cl" }, { "normalize_planar_yuv_layer_nhwc", "normalize_planar_yuv_layer.cl" }, + { "normalize_planar_yuv_layer_q8_nchw", "normalize_planar_yuv_layer_quantized.cl" }, + { "normalize_planar_yuv_layer_q8_nhwc", "normalize_planar_yuv_layer_quantized.cl" }, { "NV12_to_IYUV_bt709", "color_convert.cl" }, { "NV12_to_RGB888_bt709", "color_convert.cl" }, { "NV12_to_RGBA8888_bt709", "color_convert.cl" }, @@ -698,6 +700,10 @@ const std::map<std::string, std::string> CLKernelLibrary::_program_source_map = #include "./cl_kernels/normalize_planar_yuv_layer.clembed" }, { + "normalize_planar_yuv_layer_quantized.cl", +#include "./cl_kernels/normalize_planar_yuv_layer_quantized.clembed" + }, + { "batchnormalization_layer.cl", #include "./cl_kernels/batchnormalization_layer.clembed" }, diff --git a/src/core/CL/cl_kernels/normalize_planar_yuv_layer.cl b/src/core/CL/cl_kernels/normalize_planar_yuv_layer.cl index dc6652449e..a105968a7b 100644 --- a/src/core/CL/cl_kernels/normalize_planar_yuv_layer.cl +++ b/src/core/CL/cl_kernels/normalize_planar_yuv_layer.cl @@ -27,7 +27,7 @@ #define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -/** Apply normalize_planar_yuv layer on tensors with NCHW format. +/** Apply normalize_planar_yuv layer on tensors with NCHW data layout. * * @note Data type should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE e.g. -DVEC_SIZE=8 @@ -70,8 +70,8 @@ __kernel void normalize_planar_yuv_layer_nchw(TENSOR3D_DECLARATION(src), const uint current_slice = get_global_id(2) % NUM_CHANNELS; - const DATA_TYPE curr_mean = *((__global DATA_TYPE *)(mean.ptr + current_slice * mean.stride_x)); - const DATA_TYPE curr_std = *((__global DATA_TYPE *)(std.ptr + current_slice * std.stride_x)); + const DATA_TYPE curr_mean = *((__global DATA_TYPE *)(mean.ptr + current_slice * sizeof(DATA_TYPE))); + const DATA_TYPE curr_std = *((__global DATA_TYPE *)(std.ptr + current_slice * sizeof(DATA_TYPE))); TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); TYPE res = (data - curr_mean) / curr_std; @@ -80,7 +80,7 @@ __kernel void normalize_planar_yuv_layer_nchw(TENSOR3D_DECLARATION(src), (res, 0, (__global DATA_TYPE *)dst.ptr); } -/** Apply normalize_planar_yuv layer on tensors with NHWC format. +/** Apply normalize_planar_yuv layer on tensors with NHWC data layout. * * @note Data type should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE e.g. -DVEC_SIZE=8 @@ -122,8 +122,8 @@ __kernel void normalize_planar_yuv_layer_nhwc(TENSOR3D_DECLARATION(src), const uint current_slice = get_global_id(0); - const TYPE curr_mean = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(mean.ptr + current_slice * VEC_SIZE * mean.stride_x)); - const TYPE curr_std = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(std.ptr + current_slice * VEC_SIZE * std.stride_x)); + const TYPE curr_mean = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(mean.ptr + current_slice * VEC_SIZE * sizeof(DATA_TYPE))); + const TYPE curr_std = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(std.ptr + current_slice * VEC_SIZE * sizeof(DATA_TYPE))); TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr); TYPE res = (data - curr_mean) / curr_std; diff --git a/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl new file mode 100644 index 0000000000..925975d2ba --- /dev/null +++ b/src/core/CL/cl_kernels/normalize_planar_yuv_layer_quantized.cl @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" + +#if defined(DATA_TYPE) && defined(VEC_SIZE) && defined(OFFSET) && defined(SCALE) + +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) +#define OFFSET_FLT ((float)OFFSET) +#define SCALE_FLT ((float)SCALE) + +#if defined(NUM_CHANNELS) + +/** Apply normalize_planar_yuv layer on tensors with NCHW data layout. + * + * @note Data type should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE e.g. -DVEC_SIZE=8 + * @note The depth of the input tensor should be given as a preprocessor argument using -DNUM_CHANNELS e.g. -DNUM_CHANNELS=8 + * @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8 + * @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8 + * + * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8 + * @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes) + * @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes) + * @param[in] src_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the first source tensor in Z dimension (in bytes) + * @param[in] src_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the first source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: same as @p src_ptr + * @param[in] mean_stride_x Stride of the mean source tensor in X dimension (in bytes) + * @param[in] mean_step_x mean_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] mean_offset_first_element_in_bytes The offset of the first element in the mean source tensor + * @param[in] std_ptr Pointer to the std tensor. Supported data types: same as @p src_ptr + * @param[in] std_stride_x Stride of the std tensor in X dimension (in bytes) + * @param[in] std_step_x std_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] std_offset_first_element_in_bytes The offset of the first element in the var source tensor + */ +__kernel void normalize_planar_yuv_layer_q8_nchw(TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst), + VECTOR_DECLARATION(mean), + VECTOR_DECLARATION(std)) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + Vector mean = CONVERT_TO_VECTOR_STRUCT(mean); + Vector std = CONVERT_TO_VECTOR_STRUCT(std); + + const uint current_slice = get_global_id(2) % NUM_CHANNELS; + + float16 curr_mean_flt = (float16)(*((__global DATA_TYPE *)(mean.ptr + current_slice * sizeof(DATA_TYPE)))); + curr_mean_flt = round(curr_mean_flt - OFFSET_FLT) * SCALE_FLT; + + float16 curr_std_flt = (float16)(*((__global DATA_TYPE *)(std.ptr + current_slice * sizeof(DATA_TYPE)))); + curr_std_flt = round(curr_std_flt - OFFSET_FLT) * SCALE_FLT; + + float16 data_flt = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr), float16); + data_flt = round(data_flt - OFFSET_FLT) * SCALE_FLT; + + // Perform normalization + float16 res_flt = (data_flt - curr_mean_flt) / curr_std_flt; + + const TYPE res_u8 = CONVERT_SAT(round(res_flt / SCALE_FLT) + OFFSET_FLT, TYPE); + VSTORE(VEC_SIZE) + (res_u8, 0, (__global DATA_TYPE *)dst.ptr); +} + +#endif // defined(NUM_CHANNELS) + +/** Apply normalize_planar_yuv layer on tensors with NHWC data layout. + * + * @note Data type should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE e.g. -DVEC_SIZE=8 + * @note The quantization offset should be given as a preprocessor argument using -DOFFSET e.g. -DOFFSET=8 + * @note The quantization scale should be given as a preprocessor argument using -DSCALE e.g. -DSCALE=8 + * + * @param[in] src_ptr Pointer to the first source tensor. Supported data types: QASYMM8 + * @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes) + * @param[in] src_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes) + * @param[in] src_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the first source tensor in Z dimension (in bytes) + * @param[in] src_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the first source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: same as @p src_ptr + * @param[in] mean_stride_x Stride of the mean source tensor in X dimension (in bytes) + * @param[in] mean_step_x mean_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] mean_offset_first_element_in_bytes The offset of the first element in the mean source tensor + * @param[in] std_ptr Pointer to the std tensor. Supported data types: same as @p src_ptr + * @param[in] std_stride_x Stride of the std tensor in X dimension (in bytes) + * @param[in] std_step_x std_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] std_offset_first_element_in_bytes The offset of the first element in the var source tensor + */ +__kernel void normalize_planar_yuv_layer_q8_nhwc(TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst), + VECTOR_DECLARATION(mean), + VECTOR_DECLARATION(std)) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + Vector mean = CONVERT_TO_VECTOR_STRUCT(mean); + Vector std = CONVERT_TO_VECTOR_STRUCT(std); + + const uint current_slice = get_global_id(0); + + float16 curr_mean_flt = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(mean.ptr + current_slice * VEC_SIZE * sizeof(DATA_TYPE))), float16); + curr_mean_flt = round(curr_mean_flt - OFFSET_FLT) * SCALE_FLT; + + float16 curr_std_flt = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(std.ptr + current_slice * VEC_SIZE * sizeof(DATA_TYPE))), float16); + curr_std_flt = round(curr_std_flt - OFFSET_FLT) * SCALE_FLT; + + float16 data_flt = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr), float16); + data_flt = round(data_flt - OFFSET_FLT) * (SCALE_FLT); + + // Perform normalization + float16 res_flt = (data_flt - curr_mean_flt) / curr_std_flt; + + const TYPE res_u8 = CONVERT_SAT(round(res_flt / SCALE_FLT) + OFFSET_FLT, TYPE); + VSTORE(VEC_SIZE) + (res_u8, 0, (__global DATA_TYPE *)dst.ptr); +} +#endif // defined(DATA_TYPE) && defined(VEC_SIZE) && defined(OFFSET) && defined(SCALE) diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 31451ef422..a44507b0c6 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -42,7 +42,7 @@ namespace Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, mean, std); @@ -111,15 +111,25 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); const unsigned int channel_idx = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::CHANNEL); + const DataType dt = input->info()->data_type(); // Set build options CLBuildOptions build_opts; - build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); build_opts.add_option(("-DNUM_CHANNELS=" + support::cpp11::to_string(input->info()->dimension(channel_idx)))); + std::string kernel_name = "normalize_planar_yuv_layer_"; + if(is_data_type_quantized(dt)) + { + build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset))); + build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale))); + kernel_name += "q8_"; + } + // Create kernel - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("normalize_planar_yuv_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options())); + kernel_name += lower_string(string_from_data_layout(input->info()->data_layout())); + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), std->info()); @@ -130,7 +140,7 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso _config_id = "normalize_planar_yuv_layer_"; _config_id += lower_string(string_from_data_layout(input->info()->data_layout())); _config_id += "_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); + _config_id += lower_string(string_from_data_type(dt)); _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(0)); _config_id += "_"; |