diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-11-19 11:56:51 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-11-22 12:41:32 +0000 |
commit | 303f0dbebf631b3db00d9d64e71018abbbe9d4fe (patch) | |
tree | 631e70c9a8141f1262752829a64b3e33c7f1ee93 | |
parent | 9d3a831d4131f8a8b37f127f11d36848d33e8496 (diff) | |
download | ComputeLibrary-303f0dbebf631b3db00d9d64e71018abbbe9d4fe.tar.gz |
COMPMID-1718: Extend DepthConvert to support Cast
Change-Id: I6ee2c0b670727fc808fa636c53ddfaec3a0036c9
19 files changed, 1151 insertions, 188 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h index b70a6a993a..c155cb2982 100644 --- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h @@ -43,24 +43,24 @@ public: * * Valid conversions Input -> Output : * - * - U8 -> U16, S16, U32, S32 - * - U16 -> U8, U32, S32 - * - S16 -> U8, U32, S32 - * - U32 -> U8, U16, S16 - * - S32 -> U8, U16, S16 - * - F16 -> F32 - * - F32 -> F16 + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 * - * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel * - * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32. - * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. * diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h new file mode 100644 index 0000000000..2852300bb1 --- /dev/null +++ b/arm_compute/core/utils/misc/Requires.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_REQUIRES_H__ +#define __ARM_COMPUTE_UTILS_REQUIRES_H__ + +namespace arm_compute +{ +namespace utils +{ +namespace requires +{ +// *INDENT-OFF* +// clang-format off +namespace detail +{ +enum class enabler +{ +}; +} // namespace arm_compute + +/** Requirements as template */ +#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0> +/** Requirements as template argument */ +#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type +// clang-format on +// *INDENT-ON* +} // namespace requires +} // namespace utils +} // namespace arm_compute +#endif /*__ARM_COMPUTE_UTILS_REQUIRES_H__ */ diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h new file mode 100644 index 0000000000..3f86cc8db3 --- /dev/null +++ b/arm_compute/core/utils/misc/Rounding.h @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_ROUNDING_H__ +#define __ARM_COMPUTE_UTILS_ROUNDING_H__ + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/utils/misc/Requires.h" +#include "arm_compute/core/utils/misc/Traits.h" +#include "support/ToolchainSupport.h" + +#include <cmath> + +namespace arm_compute +{ +namespace utils +{ +namespace rounding +{ +/** Rounding mode */ +enum class RoundingMode +{ + TO_ZERO, /**< Round towards zero */ + AWAY_FROM_ZERO, /**< Round away from zero */ + HALF_TO_ZERO, /**< Round half towards from zero */ + HALF_AWAY_FROM_ZERO, /**< Round half away from zero */ + HALF_UP, /**< Round half towards positive infinity */ + HALF_DOWN, /**< Round half towards negative infinity */ + HALF_EVEN /**< Round half towards nearest even */ +}; + +/** Round floating-point value with round to zero + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_to_zero(T value) +{ + T res = std::floor(std::fabs(value)); + return (value < 0.f) ? -res : res; +} + +/** Round floating-point value with round away from zero + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_away_from_zero(T value) +{ + T res = std::ceil(std::fabs(value)); + return (value < 0.f) ? -res : res; +} + +/** Round floating-point value with half value rounding towards zero. + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_half_to_zero(T value) +{ + T res = T(std::ceil(std::fabs(value) - 0.5f)); + return (value < 0.f) ? -res : res; +} + +/** Round floating-point value with half value rounding away from zero. + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_half_away_from_zero(T value) +{ + T res = T(std::floor(std::fabs(value) + 0.5f)); + return (value < 0.f) ? -res : res; +} + +/** Round floating-point value with half value rounding to positive infinity. + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_half_up(T value) +{ + return std::floor(value + 0.5f); +} + +/** Round floating-point value with half value rounding to negative infinity. + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_half_down(T value) +{ + return std::ceil(value - 0.5f); +} + +/** Round floating-point value with half value rounding to nearest even. + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * @param[in] epsilon precision. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon()) +{ + T positive_value = std::abs(value); + T ipart = 0; + std::modf(positive_value, &ipart); + // If 'value' is exactly halfway between two integers + if(std::abs(positive_value - (ipart + 0.5f)) < epsilon) + { + // If 'ipart' is even then return 'ipart' + if(std::fmod(ipart, 2.f) < epsilon) + { + return support::cpp11::copysign(ipart, value); + } + // Else return the nearest even integer + return support::cpp11::copysign(std::ceil(ipart + 0.5f), value); + } + // Otherwise use the usual round to closest + return support::cpp11::copysign(support::cpp11::round(positive_value), value); +} + +/** Round floating-point value given a rounding mode + * + * @tparam T Parameter type. Should be of floating point type. + * + * @param[in] value floating-point value to be rounded. + * @param[in] rounding_mode Rounding mode to use. + * + * @return Floating-point value of rounded @p value. + */ +template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)> +inline T round(T value, RoundingMode rounding_mode) +{ + switch(rounding_mode) + { + case RoundingMode::TO_ZERO: + return round_to_zero(value); + case RoundingMode::AWAY_FROM_ZERO: + return round_away_from_zero(value); + case RoundingMode::HALF_TO_ZERO: + return round_half_to_zero(value); + case RoundingMode::HALF_AWAY_FROM_ZERO: + return round_half_away_from_zero(value); + case RoundingMode::HALF_UP: + return round_half_up(value); + case RoundingMode::HALF_DOWN: + return round_half_down(value); + case RoundingMode::HALF_EVEN: + return round_half_even(value); + default: + ARM_COMPUTE_ERROR("Unsupported rounding mode!"); + } +} +} // namespace rounding +} // namespace utils +} // namespace arm_compute +#endif /*__ARM_COMPUTE_UTILS_ROUNDING_H__ */ diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h new file mode 100644 index 0000000000..b9e4787d9a --- /dev/null +++ b/arm_compute/core/utils/misc/SaturateCast.h @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__ +#define __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__ + +#include "arm_compute/core/utils/misc/Rounding.h" +#include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/core/utils/misc/Utility.h" + +namespace arm_compute +{ +namespace utils +{ +namespace cast +{ +// *INDENT-OFF* +// clang-format off +// same type +template<typename T, + typename U, + typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 > +T saturate_cast(U v) +{ + return v; +} + +// signed -> signed widening/same_width +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_signed<U>() && + std::is_signed<T>() && + !std::is_same<T, U>::value && + sizeof(T) >= sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(v); +} +// signed -> signed narrowing +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_signed<U>() && + std::is_signed<T>() && + !std::is_same<T, U>::value && + sizeof(T) < sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max())); +} + +// unsigned -> signed widening +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_unsigned<U>() && + std::is_signed<T>() && + !std::is_same<T, U>::value && + (sizeof(T) > sizeof(U)), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(v); +} +// unsigned -> signed narrowing +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_unsigned<U>() && + std::is_signed<T>() && + !std::is_same<T, U>::value && + sizeof(T) < sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max())); +} +// unsigned -> signed same_width +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_unsigned<U>() && + std::is_signed<T>() && + !std::is_same<T, U>::value && + sizeof(T) == sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max())); +} + +// signed -> unsigned widening/same width +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_signed<U>() && + std::is_unsigned<T>() && + !std::is_same<T, U>::value && + sizeof(T) >= sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(std::max<U>(0, v)); +} + +// signed -> unsigned narrowing +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_signed<U>() && + std::is_unsigned<T>() && + !std::is_same<T, U>::value && + sizeof(T) < sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max())); +} + +// unsigned -> unsigned widening/same width +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_unsigned<T>() && + std::is_unsigned<U>() && + !std::is_same<T, U>::value && + sizeof(T) >= sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(v); +} + +// unsigned -> unsigned narrowing +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + std::is_integral<U>::value && + std::is_unsigned<T>() && + std::is_unsigned<U>() && + !std::is_same<T, U>::value && + sizeof(T) < sizeof(U), + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max())); +} + +// float -> int +template<typename T, + typename U, + typename std::enable_if<std::is_integral<T>::value && + traits::is_floating_point<U>::value, + int >::type = 0 > +inline T saturate_cast(U v) +{ + int32_t vi = utils::rounding::round_half_away_from_zero(v); + return saturate_cast<T>(vi); +} + +// int -> float +template<typename T, + typename U, + typename std::enable_if<traits::is_floating_point<T>::value && + std::is_integral<U>::value, + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(v); +} + +// float -> float +template<typename T, + typename U, + typename std::enable_if<traits::is_floating_point<T>::value && + traits::is_floating_point<U>::value, + int >::type = 0 > +inline T saturate_cast(U v) +{ + return static_cast<T>(v); +} +// clang-format on +// *INDENT-ON* +} // namespace cast +} // namespace utils +} // namespace arm_compute +#endif /* __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__ */ diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h new file mode 100644 index 0000000000..9d86dd1b3c --- /dev/null +++ b/arm_compute/core/utils/misc/Traits.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__ +#define __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__ + +#include <type_traits> + +namespace arm_compute +{ +namespace utils +{ +namespace traits +{ +template <typename T> +struct is_floating_point : public std::is_floating_point<T> +{ +}; + +template <> +struct is_floating_point<half> : public std::true_type +{ +}; +} // namespace traits +} // namespace utils +} // namespace arm_compute +#endif /* __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__ */ diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h index 0a9f180b4c..37c8b66e06 100644 --- a/arm_compute/core/utils/misc/Utility.h +++ b/arm_compute/core/utils/misc/Utility.h @@ -128,22 +128,6 @@ inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...); } -/** Type cast with saturation. - * - * @param[in] val Value of type U to cast. - * - * @return Original value clamped to numeric limits of T and converted to type T. - * - * @warning Numeric limits of T must be representable without loss in type U. - */ -template <typename T, typename U> -T saturate_cast(U val) -{ - const auto low = static_cast<U>(std::numeric_limits<T>::lowest()); - const auto high = static_cast<U>(std::numeric_limits<T>::max()); - return static_cast<T>(clamp(val, low, high)); -} - /** Perform an index sort of a given vector. * * @param[in] v Vector to sort diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h index 9d4aa5b6a2..2a8294f28b 100644 --- a/arm_compute/runtime/CL/CLFunctions.h +++ b/arm_compute/runtime/CL/CLFunctions.h @@ -40,6 +40,7 @@ #include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h" #include "arm_compute/runtime/CL/functions/CLBox3x3.h" #include "arm_compute/runtime/CL/functions/CLCannyEdge.h" +#include "arm_compute/runtime/CL/functions/CLCast.h" #include "arm_compute/runtime/CL/functions/CLChannelCombine.h" #include "arm_compute/runtime/CL/functions/CLChannelExtract.h" #include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h" diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h new file mode 100644 index 0000000000..ca50cf1691 --- /dev/null +++ b/arm_compute/runtime/CL/functions/CLCast.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLCAST_H__ +#define __ARM_COMPUTE_CLCAST_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/ICLSimpleFunction.h" + +#include <cstdint> + +namespace arm_compute +{ +class ICLTensor; + +/** Basic function to run @ref CLDepthConvertLayerKernel. */ +class CLCast : public ICLSimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 + * + * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + */ + void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref CLCast + * + * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy); +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLCAST_H__*/ diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h index 40ae907805..a1985562da 100644 --- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h @@ -43,24 +43,24 @@ public: * * Valid conversions Input -> Output : * - * - U8 -> U16, S16, U32, S32 - * - U16 -> U8, U32, S32 - * - S16 -> U8, U32, S32 - * - U32 -> U8, U16, S16 - * - S32 -> U8, U16, S16 - * - F16 -> F32 - * - F32 -> F16 + * - U8 -> S8, U16, S16, U32, S32, F16, F32 + * - U16 -> U8, S8, S16, U32, S32, F16, F32 + * - S16 -> U8, S8, U16, U32, S32, F16, F32 + * - U32 -> U8, S8, U16, S16, S32, F16, F32 + * - S32 -> U8, S8, U16, S16, U32, F16, F32 + * - F16 -> U8, S8, U16, S16, U32, F32 + * - F32 -> U8, S8, U16, S16, U32, F16 * - * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift); /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayer * - * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32. - * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8. * @@ -68,5 +68,5 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift); }; -} +} // namespace arm_compute #endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/ diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl index 611449e614..7b03273b7b 100644 --- a/src/core/CL/cl_kernels/depth_convert.cl +++ b/src/core/CL/cl_kernels/depth_convert.cl @@ -69,8 +69,7 @@ __kernel void convert_depth_down( in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); #if defined(IS_DATA_TYPE_FLOAT) - const DATA_TYPE_IN scale = (DATA_TYPE_IN)(1 << shift); - vstore16(CONVERT_DOWN(in_data / scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); + vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); #else /* defined(IS_DATA_TYPE_FLOAT) */ vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); #endif /* defined(IS_DATA_TYPE_FLOAT) */ @@ -109,8 +108,7 @@ __kernel void convert_depth_up( in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr); #if defined(IS_DATA_TYPE_FLOAT) - const DATA_TYPE_OUT scale = (DATA_TYPE_OUT)(1 << shift); - vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) * scale, 0, (__global DATA_TYPE_OUT *)out.ptr); + vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr); #else /* defined(IS_DATA_TYPE_FLOAT) */ vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr); #endif /* defined(IS_DATA_TYPE_FLOAT) */ diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp index ffbd295646..b0c21624ed 100644 --- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp @@ -37,8 +37,8 @@ #include <set> #include <string> -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) @@ -46,42 +46,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, - DataType::U16, DataType::U32, DataType::S32, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, - DataType::U16, DataType::U32, DataType::S32, - DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, + 1, + DataType::U8, DataType::S8, DataType::S16, + DataType::U16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, + 1, + DataType::U8, DataType::S8, DataType::S16, + DataType::U16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(), "Input and output data types must be different"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_float(input->data_type()) && shift != 0, "Shift is used only with integer inputs"); ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8); - // Check if convertion is supported - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::U16 && output->data_type() != DataType::S16 - && output->data_type() != DataType::U32 && output->data_type() != DataType::S32), - "Only data types supported [in] U8 -> [out] U16, S16, U32, S32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32 - && output->data_type() != DataType::S32), - "Only data types supported [in] U16 -> [out] U8, U32, S32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32 - && output->data_type() != DataType::S32), - "Only data types supported [in] S16 -> [out] U8, U32, S32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16 - && output->data_type() != DataType::S16), - "Only data types supported [in] U32 -> [out] U8, U16, S16"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16 - && output->data_type() != DataType::S16), - "Only data types supported [in] S32 -> [out] U8, U16, S16"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32, - "Only data types supported [in] F16 -> [out] F32"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16, - "Only data types supported [in] F32 -> [out] F16"); - // Validate in case of configured output if(output->total_size() > 0) { @@ -109,12 +87,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined - build_opts.add_option_if(input_size > output_size, ((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE"); - build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DIS_DATA_TYPE_FLOAT"); + // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined + build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || policy == ConvertPolicy::SATURATE, "-DSATURATE"); + build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || is_data_type_float(output->info()->data_type()), "-DIS_DATA_TYPE_FLOAT"); // Create kernel - const std::string kernel_name = (input_size > output_size) ? "convert_depth_down" : "convert_depth_up"; + const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up"; _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Set shift arg @@ -132,3 +110,4 @@ Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITens return Status{}; } +} // namespace arm_compute
\ No newline at end of file diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 0f416defab..e9417ece44 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -34,7 +34,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Utility.h" +#include "arm_compute/core/utils/misc/SaturateCast.h" #include <algorithm> #include <arm_neon.h> @@ -667,7 +667,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons /* Run remaining elements */ for(; i < input_width; ++i) { - out_ptr[i] = utility::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed); + out_ptr[i] = utils::cast::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed); } } }, diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp new file mode 100644 index 0000000000..e0ffcdb09b --- /dev/null +++ b/src/runtime/CL/functions/CLCast.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/CL/functions/CLCast.h" + +#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" +#include "support/ToolchainSupport.h" + +#include <utility> + +namespace arm_compute +{ +void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>(); + k->configure(input, output, policy, 0); + _kernel = std::move(k); +} + +Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy) +{ + return CLDepthConvertLayerKernel::validate(input, output, policy, 0); +} +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp index 2e52e8aadc..dbf71ac1e0 100644 --- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp +++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp @@ -28,8 +28,8 @@ #include <utility> -using namespace arm_compute; - +namespace arm_compute +{ void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift) { auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>(); @@ -41,3 +41,4 @@ Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo { return CLDepthConvertLayerKernel::validate(input, output, policy, shift); } +} // namespace arm_compute diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp new file mode 100644 index 0000000000..107edcd11e --- /dev/null +++ b/tests/validation/CL/Cast.cpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLCast.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ConvertPolicyDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/CastFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +// Tolerance +constexpr AbsoluteTolerance<float> one_tolerance(1); +constexpr AbsoluteTolerance<float> zero_tolerance(0); + +/** Input data sets **/ +// U8 +const auto CastU8toS8Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8)); +const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); +const auto CastU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); +const auto CastU8toU32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U32)); +const auto CastU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto CastU8toF16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F16)); +const auto CastU8toF32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F32)); + +// S8 +const auto CastS8toU8Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U8)); +const auto CastS8toU16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U16)); +const auto CastS8toS16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S16)); +const auto CastS8toU32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U32)); +const auto CastS8toS32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S32)); +const auto CastS8toF16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F16)); +const auto CastS8toF32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F32)); + +// U16 +const auto CastU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); +const auto CastU16toS8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S8)); +const auto CastU16toS16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S16)); +const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); +const auto CastU16toS32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S32)); +const auto CastU16toF16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F16)); +const auto CastU16toF32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F32)); + +// S16 +const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto CastS16toS8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S8)); +const auto CastS16toU16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U16)); +const auto CastS16toU32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U32)); +const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto CastS16toF16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F16)); +const auto CastS16toF32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F32)); + +// U32 +const auto CastU32toU8Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U8)); +const auto CastU32toS8Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S8)); +const auto CastU32toU16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U16)); +const auto CastU32toS16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S16)); +const auto CastU32toS32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S32)); +const auto CastU32toF16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F16)); +const auto CastU32toF32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F32)); + +// S32 +const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8)); +const auto CastS32toS8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S8)); +const auto CastS32toU16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U16)); +const auto CastS32toS16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S16)); +const auto CastS32toU32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U32)); +const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16)); +const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32)); + +// F16 +const auto CastF16toU8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8)); +const auto CastF16toS8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S8)); +const auto CastF16toU16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U16)); +const auto CastF16toS16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S16)); +const auto CastF16toU32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U32)); +const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32)); +const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); + +// F32 +const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8)); +const auto CastF32toS8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S8)); +const auto CastF32toU16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U16)); +const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S16)); +const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32)); +const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); +const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(Cast) +template <typename T> +using CLCastToU8Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint8_t>; +template <typename T> +using CLCastToS8Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int8_t>; +template <typename T> +using CLCastToU16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint16_t>; +template <typename T> +using CLCastToS16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int16_t>; +template <typename T> +using CLCastToU32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint32_t>; +template <typename T> +using CLCastToS32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int32_t>; +template <typename T> +using CLCastToF16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, half>; +template <typename T> +using CLCastToF32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, float>; + +#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \ + TEST_SUITE(NAME) \ + DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), datasets::ConvertPolicies()), \ + shape, policy) \ + { \ + CLTensor src = create_tensor<CLTensor>(shape, idt, 1); \ + CLTensor dst = create_tensor<CLTensor>(shape, odt, 1); \ + \ + CLCast cast; \ + cast.configure(&src, &dst, policy); \ + \ + const ValidRegion valid_region = shape_to_valid_region(shape); \ + validate(dst.info()->valid_region(), valid_region); \ + \ + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); \ + validate(src.info()->padding(), padding); \ + validate(dst.info()->padding(), padding); \ + } \ + FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \ + datasets::ConvertPolicies())) \ + { \ + validate(CLAccessor(_target), _reference, tolerance); \ + } \ + TEST_SUITE_END() + +// U8 +CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, CLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance) +CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, CLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance) +CAST_SUITE(U8_to_S16, DataType::U8, DataType::S16, CLCastToS16Fixture<uint8_t>, CastU8toS16Dataset, zero_tolerance) +CAST_SUITE(U8_to_U32, DataType::U8, DataType::U32, CLCastToU32Fixture<uint8_t>, CastU8toU32Dataset, zero_tolerance) +CAST_SUITE(U8_to_S32, DataType::U8, DataType::S32, CLCastToS32Fixture<uint8_t>, CastU8toS32Dataset, zero_tolerance) +CAST_SUITE(U8_to_F16, DataType::U8, DataType::F16, CLCastToF16Fixture<uint8_t>, CastU8toF16Dataset, zero_tolerance) +CAST_SUITE(U8_to_F32, DataType::U8, DataType::F32, CLCastToF32Fixture<uint8_t>, CastU8toF32Dataset, zero_tolerance) + +// S8 +CAST_SUITE(S8_to_U8, DataType::S8, DataType::U8, CLCastToU8Fixture<int8_t>, CastS8toU8Dataset, zero_tolerance) +CAST_SUITE(S8_to_U16, DataType::S8, DataType::U16, CLCastToU16Fixture<int8_t>, CastS8toU16Dataset, zero_tolerance) +CAST_SUITE(S8_to_S16, DataType::S8, DataType::S16, CLCastToS16Fixture<int8_t>, CastS8toS16Dataset, zero_tolerance) +CAST_SUITE(S8_to_U32, DataType::S8, DataType::U32, CLCastToU32Fixture<int8_t>, CastS8toU32Dataset, zero_tolerance) +CAST_SUITE(S8_to_S32, DataType::S8, DataType::S32, CLCastToS32Fixture<int8_t>, CastS8toS32Dataset, zero_tolerance) +CAST_SUITE(S8_to_F16, DataType::S8, DataType::F16, CLCastToF16Fixture<int8_t>, CastS8toF16Dataset, zero_tolerance) +CAST_SUITE(S8_to_F32, DataType::S8, DataType::F32, CLCastToF32Fixture<int8_t>, CastS8toF32Dataset, zero_tolerance) + +// U16 +CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, CLCastToU8Fixture<uint16_t>, CastU16toU8Dataset, zero_tolerance) +CAST_SUITE(U16_to_S8, DataType::U16, DataType::S8, CLCastToS8Fixture<uint16_t>, CastU16toS8Dataset, zero_tolerance) +CAST_SUITE(U16_to_S16, DataType::U16, DataType::S16, CLCastToS16Fixture<uint16_t>, CastU16toS16Dataset, zero_tolerance) +CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, CLCastToU32Fixture<uint16_t>, CastU16toU32Dataset, zero_tolerance) +CAST_SUITE(U16_to_S32, DataType::U16, DataType::S32, CLCastToS32Fixture<uint16_t>, CastU16toS32Dataset, zero_tolerance) +CAST_SUITE(U16_to_F16, DataType::U16, DataType::F16, CLCastToF16Fixture<uint16_t>, CastU16toF16Dataset, zero_tolerance) +CAST_SUITE(U16_to_F32, DataType::U16, DataType::F32, CLCastToF32Fixture<uint16_t>, CastU16toF32Dataset, zero_tolerance) + +// S16 +CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, CLCastToU8Fixture<int16_t>, CastS16toU8Dataset, zero_tolerance) +CAST_SUITE(S16_to_S8, DataType::S16, DataType::S8, CLCastToS8Fixture<int16_t>, CastS16toS8Dataset, zero_tolerance) +CAST_SUITE(S16_to_U16, DataType::S16, DataType::U16, CLCastToU16Fixture<int16_t>, CastS16toU16Dataset, zero_tolerance) +CAST_SUITE(S16_to_U32, DataType::S16, DataType::U32, CLCastToU32Fixture<int16_t>, CastS16toU32Dataset, zero_tolerance) +CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, CLCastToS32Fixture<int16_t>, CastS16toS32Dataset, zero_tolerance) +CAST_SUITE(S16_to_F16, DataType::S16, DataType::F16, CLCastToF16Fixture<int16_t>, CastS16toF16Dataset, zero_tolerance) +CAST_SUITE(S16_to_F32, DataType::S16, DataType::F32, CLCastToF32Fixture<int16_t>, CastS16toF32Dataset, zero_tolerance) + +// U32 +CAST_SUITE(U32_to_U8, DataType::U32, DataType::U8, CLCastToU8Fixture<uint32_t>, CastU32toU8Dataset, zero_tolerance) +CAST_SUITE(U32_to_S8, DataType::U32, DataType::S8, CLCastToS8Fixture<uint32_t>, CastU32toS8Dataset, zero_tolerance) +CAST_SUITE(U32_to_U16, DataType::U32, DataType::U16, CLCastToU16Fixture<uint32_t>, CastU32toU16Dataset, zero_tolerance) +CAST_SUITE(U32_to_S16, DataType::U32, DataType::S16, CLCastToS16Fixture<uint32_t>, CastU32toS16Dataset, zero_tolerance) +CAST_SUITE(U32_to_S32, DataType::U32, DataType::S32, CLCastToS32Fixture<uint32_t>, CastU32toS32Dataset, zero_tolerance) +CAST_SUITE(U32_to_F16, DataType::U32, DataType::F16, CLCastToF16Fixture<uint32_t>, CastU32toF16Dataset, zero_tolerance) +CAST_SUITE(U32_to_F32, DataType::U32, DataType::F32, CLCastToF32Fixture<uint32_t>, CastU32toF32Dataset, zero_tolerance) + +// S32 +CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, CLCastToU8Fixture<int32_t>, CastS32toU8Dataset, zero_tolerance) +CAST_SUITE(S32_to_S8, DataType::S32, DataType::S8, CLCastToS8Fixture<int32_t>, CastS32toS8Dataset, zero_tolerance) +CAST_SUITE(S32_to_U16, DataType::S32, DataType::U16, CLCastToU16Fixture<int32_t>, CastS32toU16Dataset, zero_tolerance) +CAST_SUITE(S32_to_S16, DataType::S32, DataType::S16, CLCastToS16Fixture<int32_t>, CastS32toS16Dataset, zero_tolerance) +CAST_SUITE(S32_to_U32, DataType::S32, DataType::U32, CLCastToU32Fixture<int32_t>, CastS32toU32Dataset, zero_tolerance) +CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, CLCastToF16Fixture<int32_t>, CastS32toF16Dataset, zero_tolerance) +CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, CLCastToF32Fixture<int32_t>, CastS32toF32Dataset, zero_tolerance) + +// F16 +CAST_SUITE(F16_to_U8, DataType::F16, DataType::U8, CLCastToU8Fixture<half>, CastF16toU8Dataset, one_tolerance) +CAST_SUITE(F16_to_S8, DataType::F16, DataType::S8, CLCastToS8Fixture<half>, CastF16toS8Dataset, one_tolerance) +CAST_SUITE(F16_to_U16, DataType::F16, DataType::U16, CLCastToU16Fixture<half>, CastF16toU16Dataset, one_tolerance) +CAST_SUITE(F16_to_S16, DataType::F16, DataType::S16, CLCastToS16Fixture<half>, CastF16toS16Dataset, one_tolerance) +CAST_SUITE(F16_to_U32, DataType::F16, DataType::U32, CLCastToU32Fixture<half>, CastF16toU32Dataset, one_tolerance) +CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, CLCastToS32Fixture<half>, CastF16toS32Dataset, one_tolerance) +CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, CLCastToF32Fixture<half>, CastF16toF32Dataset, one_tolerance) + +// F32 +CAST_SUITE(F32_to_U8, DataType::F32, DataType::U8, CLCastToU8Fixture<float>, CastF32toU8Dataset, one_tolerance) +CAST_SUITE(F32_to_S8, DataType::F32, DataType::S8, CLCastToS8Fixture<float>, CastF32toS8Dataset, one_tolerance) +CAST_SUITE(F32_to_U16, DataType::F32, DataType::U16, CLCastToU16Fixture<float>, CastF32toU16Dataset, one_tolerance) +CAST_SUITE(F32_to_S16, DataType::F32, DataType::S16, CLCastToS16Fixture<float>, CastF32toS16Dataset, one_tolerance) +CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture<float>, CastF32toU32Dataset, one_tolerance) +CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance) +CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture<float>, CastF32toF16Dataset, one_tolerance) + +TEST_SUITE_END() // Cast +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp index fe46313568..7d7b5f2176 100644 --- a/tests/validation/CL/DepthConvertLayer.cpp +++ b/tests/validation/CL/DepthConvertLayer.cpp @@ -348,84 +348,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, frame } TEST_SUITE_END() -TEST_SUITE(F16_to_F32) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDataset), - shape, policy, shift) -{ - // Create tensors - CLTensor src = create_tensor<CLTensor>(shape, DataType::F16, 1); - CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1); - - // Create and Configure function - CLDepthConvertLayer depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toF32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -TEST_SUITE(F32_to_F16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDataset), - shape, policy, shift) -{ - // Create tensors - CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1); - CLTensor dst = create_tensor<CLTensor>(shape, DataType::F16, 1); - - // Create and Configure function - CLDepthConvertLayer depth_convert; - depth_convert.configure(&src, &dst, policy, shift); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toF16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), - DepthConvertLayerShiftDataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - TEST_SUITE_END() TEST_SUITE_END() } // namespace validation diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h new file mode 100644 index 0000000000..3a6efa22af --- /dev/null +++ b/tests/validation/fixtures/CastFixture.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_CAST_FIXTURE +#define ARM_COMPUTE_TEST_CAST_FIXTURE + +#include "tests/validation/fixtures/DepthConvertLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2> +class CastValidationFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) + { + _target = compute_target(shape, dt_in, dt_out, policy); + _reference = compute_reference(shape, dt_in, dt_out, policy); + } + +protected: + template <typename U> + void fill(U &&tensor, int i, DataType dt_in, DataType dt_out) + { + // Restricting range to avoid inf values + if(dt_out == DataType::F16) + { + const int signed_min = -32000; + const int signed_max = 32000; + const int unsigned_min = 0; + const int unsigned_max = 65000; + + switch(dt_in) + { + case DataType::U8: + case DataType::QASYMM8: + case DataType::S8: + case DataType::F32: + { + library->fill_tensor_uniform(tensor, i); + break; + } + case DataType::U16: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min), static_cast<uint16_t>(unsigned_max)); + break; + } + case DataType::S16: + { + library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min), static_cast<int16_t>(signed_max)); + break; + } + case DataType::U32: + { + library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min), static_cast<uint32_t>(unsigned_max)); + break; + } + case DataType::S32: + { + library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), static_cast<int32_t>(signed_max)); + break; + } + default: + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } + } + else + { + library->fill_tensor_uniform(tensor, i); + } + } + + TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) + { + // Create tensors + TensorType src = create_tensor<TensorType>(shape, dt_in, 1); + TensorType dst = create_tensor<TensorType>(shape, dt_out, 1); + + // Create and configure function + FunctionType cast; + cast.configure(&src, &dst, policy); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src), 0, dt_in, dt_out); + + // Compute function + cast.run(); + + return dst; + } + + SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy) + { + // Create reference + SimpleTensor<T1> src{ shape, dt_in, 1 }; + + // Fill reference + fill(src, 0, dt_in, dt_out); + + return reference::depth_convert<T1, T2>(src, dt_out, policy, 0); + } + + TensorType _target{}; + SimpleTensor<T2> _reference{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_CAST_FIXTURE */ diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp index fd2e0ae378..4d5b97b478 100644 --- a/tests/validation/reference/DepthConvertLayer.cpp +++ b/tests/validation/reference/DepthConvertLayer.cpp @@ -25,6 +25,9 @@ #include "tests/validation/Helpers.h" +#include "arm_compute/core/utils/misc/Rounding.h" +#include "arm_compute/core/utils/misc/SaturateCast.h" + #include "tests/Types.h" namespace arm_compute @@ -35,13 +38,13 @@ namespace validation { namespace reference { -template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type > +template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type > SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) { SimpleTensor<T2> result(src.shape(), dt_out); // Up-casting - if(src.data_type() <= dt_out) + if(element_size_from_data_type(src.data_type()) < element_size_from_data_type(dt_out)) { for(int i = 0; i < src.num_elements(); ++i) { @@ -54,48 +57,100 @@ SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, Con for(int i = 0; i < src.num_elements(); ++i) { T1 val = src[i] >> shift; - result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val); + result[i] = (policy == ConvertPolicy::SATURATE) ? utils::cast::saturate_cast<T2>(val) : static_cast<T2>(val); } } return result; } -template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type > +template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type > SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift) { SimpleTensor<T2> result(src.shape(), dt_out); + ARM_COMPUTE_ERROR_ON(shift != 0); + ARM_COMPUTE_UNUSED(policy, shift); - const uint32_t scale = 1 << shift; - - // Up-casting - if(src.data_type() <= dt_out) + // Always saturate on floats + for(int i = 0; i < src.num_elements(); ++i) { - for(int i = 0; i < src.num_elements(); ++i) - { - result[i] = src[i] * static_cast<T2>(scale); - } - } - // Down-casting - else - { - for(int i = 0; i < src.num_elements(); ++i) - { - T1 val = src[i] / static_cast<T1>(scale); - result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val); - } + T1 val = utils::rounding::round_half_away_from_zero(src[i]); + result[i] = utils::cast::saturate_cast<T2>(val); } return result; } +// U8 +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// S8 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// U16 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// S16 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// U32 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// S32 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// F16 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); template SimpleTensor<float> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + +// F32 +template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); +template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); + } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DepthConvertLayer.h b/tests/validation/reference/DepthConvertLayer.h index 5d97c73b3c..2113593e9d 100644 --- a/tests/validation/reference/DepthConvertLayer.h +++ b/tests/validation/reference/DepthConvertLayer.h @@ -35,10 +35,10 @@ namespace validation { namespace reference { -template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 > +template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 > SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); -template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 > +template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 > SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift); } // namespace reference } // namespace validation |