From 303f0dbebf631b3db00d9d64e71018abbbe9d4fe Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 19 Nov 2018 11:56:51 +0000 Subject: COMPMID-1718: Extend DepthConvert to support Cast Change-Id: I6ee2c0b670727fc808fa636c53ddfaec3a0036c9 --- src/core/CL/cl_kernels/depth_convert.cl | 6 +-- src/core/CL/kernels/CLDepthConvertLayerKernel.cpp | 57 +++++++---------------- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 4 +- src/runtime/CL/functions/CLCast.cpp | 44 +++++++++++++++++ src/runtime/CL/functions/CLDepthConvertLayer.cpp | 5 +- 5 files changed, 69 insertions(+), 47 deletions(-) create mode 100644 src/runtime/CL/functions/CLCast.cpp (limited to 'src') diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl index 611449e614..7b03273b7b 100644 --- a/src/core/CL/cl_kernels/depth_convert.cl +++ b/src/core/CL/cl_kernels/depth_convert.cl @@ -69,8 +69,7 @@ __kernel void convert_depth_down( in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); #if defined(IS_DATA_TYPE_FLOAT) - const DATA_TYPE_IN scale = (DATA_TYPE_IN)(1 << shift); - vstore16(CONVERT_DOWN(in_data / scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); + vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); #else /* defined(IS_DATA_TYPE_FLOAT) */ vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); #endif /* defined(IS_DATA_TYPE_FLOAT) */ @@ -109,8 +108,7 @@ __kernel void convert_depth_up( in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); #if defined(IS_DATA_TYPE_FLOAT) - const DATA_TYPE_OUT scale = (DATA_TYPE_OUT)(1 << shift); - vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) * scale, 0, (__global DATA_TYPE_OUT *)out.ptr); + vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); #else /* defined(IS_DATA_TYPE_FLOAT) */ vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr); #endif /* defined(IS_DATA_TYPE_FLOAT) */ diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index ffbd295646..b0c21624ed 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -37,8 +37,8 @@ #include #include -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) @@ -46,42 +46,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, - DataType::U16, DataType::U32, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, - DataType::U16, DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, + 1, + DataType::U8, DataType::S8, DataType::S16, + DataType::U16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, + 1, + DataType::U8, DataType::S8, DataType::S16, + DataType::U16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(), "Input and output data types must be different"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_float(input->data_type()) && shift != 0, "Shift is used only with integer inputs"); ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8); - // Check if convertion is supported - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::U16 && output->data_type() != DataType::S16 - && output->data_type() != DataType::U32 && output->data_type() != DataType::S32), - "Only data types supported [in] U8 -> [out] U16, S16, U32, S32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32 - && output->data_type() != DataType::S32), - "Only data types supported [in] U16 -> [out] U8, U32, S32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32 - && output->data_type() != DataType::S32), - "Only data types supported [in] S16 -> [out] U8, U32, S32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16 - && output->data_type() != DataType::S16), - "Only data types supported [in] U32 -> [out] U8, U16, S16"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16 - && output->data_type() != DataType::S16), - "Only data types supported [in] S32 -> [out] U8, U16, S16"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32, - "Only data types supported [in] F16 -> [out] F32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16, - "Only data types supported [in] F32 -> [out] F16"); - // Validate in case of configured output if(output->total_size() > 0) { @@ -109,12 +87,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined - build_opts.add_option_if(input_size > output_size, ((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE"); - build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DIS_DATA_TYPE_FLOAT"); + // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined + build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || policy == ConvertPolicy::SATURATE, "-DSATURATE"); + build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || is_data_type_float(output->info()->data_type()), "-DIS_DATA_TYPE_FLOAT"); // Create kernel - const std::string kernel_name = (input_size > output_size) ? "convert_depth_down" : "convert_depth_up"; + const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up"; _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Set shift arg @@ -132,3 +110,4 @@ Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITens return Status{}; } +} // namespace arm_compute \ No newline at end of file diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 0f416defab..e9417ece44 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -34,7 +34,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Utility.h" +#include "arm_compute/core/utils/misc/SaturateCast.h" #include #include @@ -667,7 +667,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons /* Run remaining elements */ for(; i < input_width; ++i) { - out_ptr[i] = utility::saturate_cast(tmp_ptr[i] * sum_inversed); + out_ptr[i] = utils::cast::saturate_cast(tmp_ptr[i] * sum_inversed); } } }, diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp new file mode 100644 index 0000000000..e0ffcdb09b --- /dev/null +++ b/src/runtime/CL/functions/CLCast.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLCast.h" + +#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "support/ToolchainSupport.h" + +#include + +namespace arm_compute +{ +void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, policy, 0); + _kernel = std::move(k); +} + +Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy) +{ + return CLDepthConvertLayerKernel::validate(input, output, policy, 0); +} +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp index 2e52e8aadc..dbf71ac1e0 100644 --- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp +++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp @@ -28,8 +28,8 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { auto k = arm_compute::support::cpp14::make_unique(); @@ -41,3 +41,4 @@ Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo { return CLDepthConvertLayerKernel::validate(input, output, policy, shift); } +} // namespace arm_compute -- cgit v1.2.1