From 205ba243309baaec4bccfc82229139978d1a354e Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 12 Jul 2023 14:29:58 +0100 Subject: Added S64/U64 support for the input in CLCast * Partially resolves MLCE-1089 Change-Id: Ie3d2fc2f755ae99cdb17b57cc90bb3f99a1843e0 Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9909 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/Utils.h | 9 +++-- arm_compute/runtime/CL/functions/CLCast.h | 33 +++++------------- docs/user_guide/operator_list.dox | 7 ++-- docs/user_guide/release_version_and_change_log.dox | 2 +- src/gpu/cl/kernels/ClCastKernel.cpp | 4 +-- src/gpu/cl/kernels/ClCastKernel.h | 10 +++--- tests/validation/CL/Cast.cpp | 40 +++++++++++++++++++++- tests/validation/fixtures/CastFixture.h | 10 ++++++ tests/validation/reference/DepthConvertLayer.cpp | 19 +++++++++- 9 files changed, 96 insertions(+), 38 deletions(-) diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 188ae8cf5b..6fa983d24e 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -164,6 +164,9 @@ inline size_t element_size_from_data_type(DataType dt) case DataType::S32: case DataType::F32: return 4; + case DataType::U64: + case DataType::S64: + return 8; default: ARM_COMPUTE_ERROR("Undefined element size for given data type"); return 0; @@ -712,7 +715,7 @@ QuantizationInfo get_softmax_output_quantization_info(DataType input_type, bool * * @return The pair with minimum and maximum values */ -std::pair get_quantized_activation_min_max(const ActivationLayerInfo& act_info, DataType data_type, UniformQuantizationInfo oq_info); +std::pair get_quantized_activation_min_max(const ActivationLayerInfo &act_info, DataType data_type, UniformQuantizationInfo oq_info); /** Convert a tensor format into a string. * @@ -749,7 +752,7 @@ const std::string &string_from_data_type(DataType dt); * * @return The string describing the activation function. */ -const std::string &string_from_activation_func(const ActivationFunction& act); +const std::string &string_from_activation_func(const ActivationFunction &act); /** Translates a given interpolation policy to a string. * * @param[in] policy @ref InterpolationPolicy to be translated to string. @@ -798,7 +801,7 @@ bool is_pool_3d_region_entirely_outside_input(const Pooling3dLayerInfo &info); * * @return True if padding is symmetric */ -inline bool is_symmetric(const Padding3D& info) +inline bool is_symmetric(const Padding3D &info) { return ((info.left == info.right) && (info.top == info.bottom) && (info.front == info.back)); } diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h index d2cea7a8a2..650cd11b9b 100644 --- a/arm_compute/runtime/CL/functions/CLCast.h +++ b/arm_compute/runtime/CL/functions/CLCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,43 +61,28 @@ public: * |src |dst | * |:--------------|:--------------------------------------| * |U8 | S8, U16, S16, U32, S32, F16, F32 | + * |S8 | U8, U16, S16, U32, S32, F16, F32 | * |U16 | U8, S8, S16, U32, S32, F16, F32 | * |S16 | U8, S8, U16, U32, S32, F16, F32 | * |U32 | U8, S8, U16, S16, S32, F16, F32 | * |S32 | U8, S8, U16, S16, U32, F16, F32 | - * |F16 | U8, S8, U16, S16, U32, F32 | - * |F32 | U8, S8, U16, S16, U32, F16 | + * |U64 | U8, S8, U16, S16, U32, S32, F16, F32 | + * |S64 | U8, S8, U16, S16, U32, S32, F16, F32 | + * |F16 | U8, S8, U16, S16, S32, U32, F32 | + * |F32 | U8, S8, U16, S16, S32, U32, F16 | * * Input data type must be different than output data type. * - * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/U64/S64/F16/F32. * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy); - /** Initialize the function's source, destination - * - * Input data type must be different than output data type. - * - * Valid conversions Input -> Output : - * - * - U8 -> S8, U16, S16, U32, S32, F16, F32 - * - U16 -> U8, S8, S16, U32, S32, F16, F32 - * - S16 -> U8, S8, U16, U32, S32, F16, F32 - * - U32 -> U8, S8, U16, S16, S32, F16, F32 - * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 - * - * @param[in] compile_context The compile context to be used. - * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. - * @param[in] policy Conversion policy. - */ + // Initialize the function's source, destination void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref CLCast * - * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/U64/S64/F16/F32. * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox index 8d34a763a5..66b8988d29 100644 --- a/docs/user_guide/operator_list.dox +++ b/docs/user_guide/operator_list.dox @@ -437,12 +437,15 @@ where N = batches, C = channels, H = height, W = width, D = depth
srcdst
U8S8, U16, S16, U32, S32, F16, F32 +
S8U8, U16, S16, U32, S32, F16, F32
U16U8, S8, S16, U32, S32, F16, F32
S16U8, S8, U16, U32, S32, F16, F32
U32U8, S8, U16, S16, S32, F16, F32
S32U8, S8, U16, S16, U32, F16, F32 -
F16U8, S8, U16, S16, U32, F32 -
F32U8, S8, U16, S16, U32, F16 +
U64U8, S8, U16, S16, U32, S32, F16, F32 +
S64U8, S8, U16, S16, U32, S32, F16, F32 +
F16U8, S8, U16, S16, S32, U32, F32 +
F32U8, S8, U16, S16, S32, U32, F16
ChannelShuffleLayer diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index ec96f6096a..ce96370305 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -48,7 +48,7 @@ v23.08 Public major release - Add new OpenCLâ„¢ kernels: - @ref opencl::kernels::ClMatMulNativeMMULKernel support for FP32 and FP16, with batch support - Enable transposed convolution with non-square kernels on CPU and GPU. - + - Added support for input data type U64/S64 in CLCast. v23.05.1 Public patch release - Enable CMake and Bazel option to build multi_isa without FP16 support. - Fix compilation error in NEReorderLayer (aarch64 only). diff --git a/src/gpu/cl/kernels/ClCastKernel.cpp b/src/gpu/cl/kernels/ClCastKernel.cpp index 6baa31e710..991867d1f3 100644 --- a/src/gpu/cl/kernels/ClCastKernel.cpp +++ b/src/gpu/cl/kernels/ClCastKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2022 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, Conver 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, - DataType::F32); + DataType::F32, DataType::S64, DataType::U64); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16, diff --git a/src/gpu/cl/kernels/ClCastKernel.h b/src/gpu/cl/kernels/ClCastKernel.h index 7fadfa73d0..a021b3c78c 100644 --- a/src/gpu/cl/kernels/ClCastKernel.h +++ b/src/gpu/cl/kernels/ClCastKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2022 Arm Limited. + * Copyright (c) 2016-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,12 +53,14 @@ public: * - U16 -> U8, S8, S16, U32, S32, F16, F32 * - S16 -> U8, S8, U16, U32, S32, F16, F32 * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S64 -> U8, S8, U16, S16, U32, S32, F16, F32 + * - U64 -> U8, S8, U16, S16, U32, S32, F16, F32 * - S32 -> U8, S8, U16, S16, U32, F16, F32 - * - F16 -> U8, S8, U16, S16, U32, F32 - * - F32 -> U8, S8, U16, S16, U32, F16 + * - F16 -> U8, S8, U16, S16, U32, S32, F32 + * - F32 -> U8, S8, U16, S16, U32, S32, F16 * * @param[in] compile_context The compile context to be used. - * @param[in] src The source tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32. + * @param[in] src The source tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/U64/S64/F16/F32. * @param[out] dst The destination tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy */ diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp index 84455ba941..3d04b80799 100644 --- a/tests/validation/CL/Cast.cpp +++ b/tests/validation/CL/Cast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, 2022 Arm Limited. + * Copyright (c) 2018-2020, 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -122,6 +122,26 @@ const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", Da const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32)); const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); + +// U64 +const auto CastU64toU8Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U8)); +const auto CastU64toS8Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S8)); +const auto CastU64toU16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U16)); +const auto CastU64toS16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S16)); +const auto CastU64toU32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U32)); +const auto CastU64toS32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S32)); +const auto CastU64toF16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F16)); +const auto CastU64toF32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F32)); + +// S64 +const auto CastS64toU8Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U8)); +const auto CastS64toS8Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S8)); +const auto CastS64toU16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U16)); +const auto CastS64toS16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S16)); +const auto CastS64toU32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U32)); +const auto CastS64toS32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S32)); +const auto CastS64toF16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F16)); +const auto CastS64toF32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F32)); } // namespace TEST_SUITE(CL) @@ -227,6 +247,24 @@ CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture, CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture, CastF32toS32Dataset, one_tolerance) CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture, CastF32toF16Dataset, zero_tolerance) +// S64 +CAST_SUITE(S64_to_U8, DataType::S64, DataType::U8, CLCastToU8Fixture, CastS64toU8Dataset, one_tolerance) +CAST_SUITE(S64_to_S8, DataType::S64, DataType::S8, CLCastToS8Fixture, CastS64toS8Dataset, one_tolerance) +CAST_SUITE(S64_to_U16, DataType::S64, DataType::U16, CLCastToU16Fixture, CastS64toU16Dataset, one_tolerance) +CAST_SUITE(S64_to_S16, DataType::S64, DataType::S16, CLCastToS16Fixture, CastS64toS16Dataset, one_tolerance) +CAST_SUITE(S64_to_U32, DataType::S64, DataType::U32, CLCastToU32Fixture, CastS64toU32Dataset, one_tolerance) +CAST_SUITE(S64_to_S32, DataType::S64, DataType::S32, CLCastToS32Fixture, CastS64toS32Dataset, one_tolerance) +CAST_SUITE(S64_to_F16, DataType::S64, DataType::F16, CLCastToF16Fixture, CastS64toF16Dataset, zero_tolerance) + +// U64 +CAST_SUITE(U64_to_U8, DataType::U64, DataType::U8, CLCastToU8Fixture, CastU64toU8Dataset, one_tolerance) +CAST_SUITE(U64_to_S8, DataType::U64, DataType::S8, CLCastToS8Fixture, CastU64toS8Dataset, one_tolerance) +CAST_SUITE(U64_to_U16, DataType::U64, DataType::U16, CLCastToU16Fixture, CastU64toU16Dataset, one_tolerance) +CAST_SUITE(U64_to_S16, DataType::U64, DataType::S16, CLCastToS16Fixture, CastU64toS16Dataset, one_tolerance) +CAST_SUITE(U64_to_U32, DataType::U64, DataType::U32, CLCastToU32Fixture, CastU64toU32Dataset, one_tolerance) +CAST_SUITE(U64_to_S32, DataType::U64, DataType::S32, CLCastToS32Fixture, CastU64toS32Dataset, one_tolerance) +CAST_SUITE(U64_to_F16, DataType::U64, DataType::F16, CLCastToF16Fixture, CastU64toF16Dataset, zero_tolerance) + TEST_SUITE_END() // Cast TEST_SUITE_END() // CL } // namespace validation diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h index 60b4c2b314..e9d624e6f3 100644 --- a/tests/validation/fixtures/CastFixture.h +++ b/tests/validation/fixtures/CastFixture.h @@ -85,6 +85,16 @@ protected: library->fill_tensor_uniform(tensor, i, static_cast(signed_min), static_cast(signed_max)); break; } + case DataType::U64: + { + library->fill_tensor_uniform(tensor, i, static_cast(unsigned_min), static_cast(unsigned_max)); + break; + } + case DataType::S64: + { + library->fill_tensor_uniform(tensor, i, static_cast(signed_min), static_cast(signed_max)); + break; + } default: ARM_COMPUTE_ERROR("NOT SUPPORTED!"); } diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp index 94c719ade7..8797722f00 100644 --- a/tests/validation/reference/DepthConvertLayer.cpp +++ b/tests/validation/reference/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -186,6 +186,23 @@ template SimpleTensor depth_convert(const SimpleTensor &src, Dat template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +// S64 +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// U64 +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1