aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-19 11:56:51 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2018-11-22 12:41:32 +0000
commit303f0dbebf631b3db00d9d64e71018abbbe9d4fe (patch)
tree631e70c9a8141f1262752829a64b3e33c7f1ee93
parent9d3a831d4131f8a8b37f127f11d36848d33e8496 (diff)
downloadComputeLibrary-303f0dbebf631b3db00d9d64e71018abbbe9d4fe.tar.gz
COMPMID-1718: Extend DepthConvert to support Cast
Change-Id: I6ee2c0b670727fc808fa636c53ddfaec3a0036c9
-rw-r--r--arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h22
-rw-r--r--arm_compute/core/utils/misc/Requires.h51
-rw-r--r--arm_compute/core/utils/misc/Rounding.h205
-rw-r--r--arm_compute/core/utils/misc/SaturateCast.h218
-rw-r--r--arm_compute/core/utils/misc/Traits.h47
-rw-r--r--arm_compute/core/utils/misc/Utility.h16
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLCast.h70
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthConvertLayer.h24
-rw-r--r--src/core/CL/cl_kernels/depth_convert.cl6
-rw-r--r--src/core/CL/kernels/CLDepthConvertLayerKernel.cpp57
-rw-r--r--src/core/NEON/kernels/NESoftmaxLayerKernel.cpp4
-rw-r--r--src/runtime/CL/functions/CLCast.cpp44
-rw-r--r--src/runtime/CL/functions/CLDepthConvertLayer.cpp5
-rw-r--r--tests/validation/CL/Cast.cpp244
-rw-r--r--tests/validation/CL/DepthConvertLayer.cpp78
-rw-r--r--tests/validation/fixtures/CastFixture.h144
-rw-r--r--tests/validation/reference/DepthConvertLayer.cpp99
-rw-r--r--tests/validation/reference/DepthConvertLayer.h4
19 files changed, 1151 insertions, 188 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
index b70a6a993a..c155cb2982 100644
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -43,24 +43,24 @@ public:
*
* Valid conversions Input -> Output :
*
- * - U8 -> U16, S16, U32, S32
- * - U16 -> U8, U32, S32
- * - S16 -> U8, U32, S32
- * - U32 -> U8, U16, S16
- * - S32 -> U8, U16, S16
- * - F16 -> F32
- * - F32 -> F16
+ * - U8 -> S8, U16, S16, U32, S32, F16, F32
+ * - U16 -> U8, S8, S16, U32, S32, F16, F32
+ * - S16 -> U8, S8, U16, U32, S32, F16, F32
+ * - U32 -> U8, S8, U16, S16, S32, F16, F32
+ * - S32 -> U8, S8, U16, S16, U32, F16, F32
+ * - F16 -> U8, S8, U16, S16, U32, F32
+ * - F32 -> U8, S8, U16, S16, U32, F16
*
- * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*/
void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*
diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h
new file mode 100644
index 0000000000..2852300bb1
--- /dev/null
+++ b/arm_compute/core/utils/misc/Requires.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_REQUIRES_H__
+#define __ARM_COMPUTE_UTILS_REQUIRES_H__
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace requires
+{
+// *INDENT-OFF*
+// clang-format off
+namespace detail
+{
+enum class enabler
+{
+};
+} // namespace arm_compute
+
+/** Requirements as template */
+#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0>
+/** Requirements as template argument */
+#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type
+// clang-format on
+// *INDENT-ON*
+} // namespace requires
+} // namespace utils
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_UTILS_REQUIRES_H__ */
diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h
new file mode 100644
index 0000000000..3f86cc8db3
--- /dev/null
+++ b/arm_compute/core/utils/misc/Rounding.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_ROUNDING_H__
+#define __ARM_COMPUTE_UTILS_ROUNDING_H__
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/Requires.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "support/ToolchainSupport.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace rounding
+{
+/** Rounding mode */
+enum class RoundingMode
+{
+ TO_ZERO, /**< Round towards zero */
+ AWAY_FROM_ZERO, /**< Round away from zero */
+ HALF_TO_ZERO, /**< Round half towards from zero */
+ HALF_AWAY_FROM_ZERO, /**< Round half away from zero */
+ HALF_UP, /**< Round half towards positive infinity */
+ HALF_DOWN, /**< Round half towards negative infinity */
+ HALF_EVEN /**< Round half towards nearest even */
+};
+
+/** Round floating-point value with round to zero
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_to_zero(T value)
+{
+ T res = std::floor(std::fabs(value));
+ return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with round away from zero
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_away_from_zero(T value)
+{
+ T res = std::ceil(std::fabs(value));
+ return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with half value rounding towards zero.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_to_zero(T value)
+{
+ T res = T(std::ceil(std::fabs(value) - 0.5f));
+ return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with half value rounding away from zero.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_away_from_zero(T value)
+{
+ T res = T(std::floor(std::fabs(value) + 0.5f));
+ return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with half value rounding to positive infinity.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_up(T value)
+{
+ return std::floor(value + 0.5f);
+}
+
+/** Round floating-point value with half value rounding to negative infinity.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_down(T value)
+{
+ return std::ceil(value - 0.5f);
+}
+
+/** Round floating-point value with half value rounding to nearest even.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ * @param[in] epsilon precision.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon())
+{
+ T positive_value = std::abs(value);
+ T ipart = 0;
+ std::modf(positive_value, &ipart);
+ // If 'value' is exactly halfway between two integers
+ if(std::abs(positive_value - (ipart + 0.5f)) < epsilon)
+ {
+ // If 'ipart' is even then return 'ipart'
+ if(std::fmod(ipart, 2.f) < epsilon)
+ {
+ return support::cpp11::copysign(ipart, value);
+ }
+ // Else return the nearest even integer
+ return support::cpp11::copysign(std::ceil(ipart + 0.5f), value);
+ }
+ // Otherwise use the usual round to closest
+ return support::cpp11::copysign(support::cpp11::round(positive_value), value);
+}
+
+/** Round floating-point value given a rounding mode
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ * @param[in] rounding_mode Rounding mode to use.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round(T value, RoundingMode rounding_mode)
+{
+ switch(rounding_mode)
+ {
+ case RoundingMode::TO_ZERO:
+ return round_to_zero(value);
+ case RoundingMode::AWAY_FROM_ZERO:
+ return round_away_from_zero(value);
+ case RoundingMode::HALF_TO_ZERO:
+ return round_half_to_zero(value);
+ case RoundingMode::HALF_AWAY_FROM_ZERO:
+ return round_half_away_from_zero(value);
+ case RoundingMode::HALF_UP:
+ return round_half_up(value);
+ case RoundingMode::HALF_DOWN:
+ return round_half_down(value);
+ case RoundingMode::HALF_EVEN:
+ return round_half_even(value);
+ default:
+ ARM_COMPUTE_ERROR("Unsupported rounding mode!");
+ }
+}
+} // namespace rounding
+} // namespace utils
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_UTILS_ROUNDING_H__ */
diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h
new file mode 100644
index 0000000000..b9e4787d9a
--- /dev/null
+++ b/arm_compute/core/utils/misc/SaturateCast.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__
+#define __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__
+
+#include "arm_compute/core/utils/misc/Rounding.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/utils/misc/Utility.h"
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace cast
+{
+// *INDENT-OFF*
+// clang-format off
+// same type
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 >
+T saturate_cast(U v)
+{
+ return v;
+}
+
+// signed -> signed widening/same_width
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_signed<U>() &&
+ std::is_signed<T>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) >= sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(v);
+}
+// signed -> signed narrowing
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_signed<U>() &&
+ std::is_signed<T>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) < sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
+}
+
+// unsigned -> signed widening
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_unsigned<U>() &&
+ std::is_signed<T>() &&
+ !std::is_same<T, U>::value &&
+ (sizeof(T) > sizeof(U)),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(v);
+}
+// unsigned -> signed narrowing
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_unsigned<U>() &&
+ std::is_signed<T>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) < sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
+}
+// unsigned -> signed same_width
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_unsigned<U>() &&
+ std::is_signed<T>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) == sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
+}
+
+// signed -> unsigned widening/same width
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_signed<U>() &&
+ std::is_unsigned<T>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) >= sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(std::max<U>(0, v));
+}
+
+// signed -> unsigned narrowing
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_signed<U>() &&
+ std::is_unsigned<T>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) < sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max()));
+}
+
+// unsigned -> unsigned widening/same width
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_unsigned<T>() &&
+ std::is_unsigned<U>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) >= sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(v);
+}
+
+// unsigned -> unsigned narrowing
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ std::is_integral<U>::value &&
+ std::is_unsigned<T>() &&
+ std::is_unsigned<U>() &&
+ !std::is_same<T, U>::value &&
+ sizeof(T) < sizeof(U),
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
+}
+
+// float -> int
+template<typename T,
+ typename U,
+ typename std::enable_if<std::is_integral<T>::value &&
+ traits::is_floating_point<U>::value,
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ int32_t vi = utils::rounding::round_half_away_from_zero(v);
+ return saturate_cast<T>(vi);
+}
+
+// int -> float
+template<typename T,
+ typename U,
+ typename std::enable_if<traits::is_floating_point<T>::value &&
+ std::is_integral<U>::value,
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(v);
+}
+
+// float -> float
+template<typename T,
+ typename U,
+ typename std::enable_if<traits::is_floating_point<T>::value &&
+ traits::is_floating_point<U>::value,
+ int >::type = 0 >
+inline T saturate_cast(U v)
+{
+ return static_cast<T>(v);
+}
+// clang-format on
+// *INDENT-ON*
+} // namespace cast
+} // namespace utils
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__ */
diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h
new file mode 100644
index 0000000000..9d86dd1b3c
--- /dev/null
+++ b/arm_compute/core/utils/misc/Traits.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__
+#define __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__
+
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace traits
+{
+template <typename T>
+struct is_floating_point : public std::is_floating_point<T>
+{
+};
+
+template <>
+struct is_floating_point<half> : public std::true_type
+{
+};
+} // namespace traits
+} // namespace utils
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__ */
diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h
index 0a9f180b4c..37c8b66e06 100644
--- a/arm_compute/core/utils/misc/Utility.h
+++ b/arm_compute/core/utils/misc/Utility.h
@@ -128,22 +128,6 @@ inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype
return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...);
}
-/** Type cast with saturation.
- *
- * @param[in] val Value of type U to cast.
- *
- * @return Original value clamped to numeric limits of T and converted to type T.
- *
- * @warning Numeric limits of T must be representable without loss in type U.
- */
-template <typename T, typename U>
-T saturate_cast(U val)
-{
- const auto low = static_cast<U>(std::numeric_limits<T>::lowest());
- const auto high = static_cast<U>(std::numeric_limits<T>::max());
- return static_cast<T>(clamp(val, low, high));
-}
-
/** Perform an index sort of a given vector.
*
* @param[in] v Vector to sort
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 9d4aa5b6a2..2a8294f28b 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -40,6 +40,7 @@
#include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h"
#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
#include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
+#include "arm_compute/runtime/CL/functions/CLCast.h"
#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
#include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
new file mode 100644
index 0000000000..ca50cf1691
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCAST_H__
+#define __ARM_COMPUTE_CLCAST_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDepthConvertLayerKernel. */
+class CLCast : public ICLSimpleFunction
+{
+public:
+ /** Initialize the function's source, destination
+ *
+ * Input data type must be different than output data type.
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - U8 -> S8, U16, S16, U32, S32, F16, F32
+ * - U16 -> U8, S8, S16, U32, S32, F16, F32
+ * - S16 -> U8, S8, U16, U32, S32, F16, F32
+ * - U32 -> U8, S8, U16, S16, S32, F16, F32
+ * - S32 -> U8, S8, U16, S16, U32, F16, F32
+ * - F16 -> U8, S8, U16, S16, U32, F32
+ * - F32 -> U8, S8, U16, S16, U32, F16
+ *
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLCast
+ *
+ * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLCAST_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index 40ae907805..a1985562da 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -43,24 +43,24 @@ public:
*
* Valid conversions Input -> Output :
*
- * - U8 -> U16, S16, U32, S32
- * - U16 -> U8, U32, S32
- * - S16 -> U8, U32, S32
- * - U32 -> U8, U16, S16
- * - S32 -> U8, U16, S16
- * - F16 -> F32
- * - F32 -> F16
+ * - U8 -> S8, U16, S16, U32, S32, F16, F32
+ * - U16 -> U8, S8, S16, U32, S32, F16, F32
+ * - S16 -> U8, S8, U16, U32, S32, F16, F32
+ * - U32 -> U8, S8, U16, S16, S32, F16, F32
+ * - S32 -> U8, S8, U16, S16, U32, F16, F32
+ * - F16 -> U8, S8, U16, S16, U32, F32
+ * - F32 -> U8, S8, U16, S16, U32, F16
*
- * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy.
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*/
void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayer
*
- * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy.
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*
@@ -68,5 +68,5 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift);
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/
diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl
index 611449e614..7b03273b7b 100644
--- a/src/core/CL/cl_kernels/depth_convert.cl
+++ b/src/core/CL/cl_kernels/depth_convert.cl
@@ -69,8 +69,7 @@ __kernel void convert_depth_down(
in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
#if defined(IS_DATA_TYPE_FLOAT)
- const DATA_TYPE_IN scale = (DATA_TYPE_IN)(1 << shift);
- vstore16(CONVERT_DOWN(in_data / scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
+ vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
#else /* defined(IS_DATA_TYPE_FLOAT) */
vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
#endif /* defined(IS_DATA_TYPE_FLOAT) */
@@ -109,8 +108,7 @@ __kernel void convert_depth_up(
in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
#if defined(IS_DATA_TYPE_FLOAT)
- const DATA_TYPE_OUT scale = (DATA_TYPE_OUT)(1 << shift);
- vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) * scale, 0, (__global DATA_TYPE_OUT *)out.ptr);
+ vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
#else /* defined(IS_DATA_TYPE_FLOAT) */
vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr);
#endif /* defined(IS_DATA_TYPE_FLOAT) */
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index ffbd295646..b0c21624ed 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -37,8 +37,8 @@
#include <set>
#include <string>
-using namespace arm_compute;
-
+namespace arm_compute
+{
namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
@@ -46,42 +46,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON(input == output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16,
- DataType::U16, DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
- DataType::U16, DataType::U32, DataType::S32,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input,
+ 1,
+ DataType::U8, DataType::S8, DataType::S16,
+ DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output,
+ 1,
+ DataType::U8, DataType::S8, DataType::S16,
+ DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(), "Input and output data types must be different");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_float(input->data_type()) && shift != 0, "Shift is used only with integer inputs");
ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8);
- // Check if convertion is supported
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::U16 && output->data_type() != DataType::S16
- && output->data_type() != DataType::U32 && output->data_type() != DataType::S32),
- "Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32
- && output->data_type() != DataType::S32),
- "Only data types supported [in] U16 -> [out] U8, U32, S32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32
- && output->data_type() != DataType::S32),
- "Only data types supported [in] S16 -> [out] U8, U32, S32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16
- && output->data_type() != DataType::S16),
- "Only data types supported [in] U32 -> [out] U8, U16, S16");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16
- && output->data_type() != DataType::S16),
- "Only data types supported [in] S32 -> [out] U8, U16, S16");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32,
- "Only data types supported [in] F16 -> [out] F32");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16,
- "Only data types supported [in] F32 -> [out] F16");
-
// Validate in case of configured output
if(output->total_size() > 0)
{
@@ -109,12 +87,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
CLBuildOptions build_opts;
build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
- build_opts.add_option_if(input_size > output_size, ((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE");
- build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DIS_DATA_TYPE_FLOAT");
+ // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
+ build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || policy == ConvertPolicy::SATURATE, "-DSATURATE");
+ build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || is_data_type_float(output->info()->data_type()), "-DIS_DATA_TYPE_FLOAT");
// Create kernel
- const std::string kernel_name = (input_size > output_size) ? "convert_depth_down" : "convert_depth_up";
+ const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up";
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Set shift arg
@@ -132,3 +110,4 @@ Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITens
return Status{};
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 0f416defab..e9417ece44 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -34,7 +34,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Utility.h"
+#include "arm_compute/core/utils/misc/SaturateCast.h"
#include <algorithm>
#include <arm_neon.h>
@@ -667,7 +667,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons
/* Run remaining elements */
for(; i < input_width; ++i)
{
- out_ptr[i] = utility::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed);
+ out_ptr[i] = utils::cast::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed);
}
}
},
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
new file mode 100644
index 0000000000..e0ffcdb09b
--- /dev/null
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+namespace arm_compute
+{
+void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
+ k->configure(input, output, policy, 0);
+ _kernel = std::move(k);
+}
+
+Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy)
+{
+ return CLDepthConvertLayerKernel::validate(input, output, policy, 0);
+}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 2e52e8aadc..dbf71ac1e0 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -28,8 +28,8 @@
#include <utility>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
@@ -41,3 +41,4 @@ Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo
{
return CLDepthConvertLayerKernel::validate(input, output, policy, shift);
}
+} // namespace arm_compute
diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp
new file mode 100644
index 0000000000..107edcd11e
--- /dev/null
+++ b/tests/validation/CL/Cast.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/CastFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+// Tolerance
+constexpr AbsoluteTolerance<float> one_tolerance(1);
+constexpr AbsoluteTolerance<float> zero_tolerance(0);
+
+/** Input data sets **/
+// U8
+const auto CastU8toS8Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8));
+const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto CastU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto CastU8toU32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U32));
+const auto CastU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto CastU8toF16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F16));
+const auto CastU8toF32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F32));
+
+// S8
+const auto CastS8toU8Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U8));
+const auto CastS8toU16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U16));
+const auto CastS8toS16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S16));
+const auto CastS8toU32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U32));
+const auto CastS8toS32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S32));
+const auto CastS8toF16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F16));
+const auto CastS8toF32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F32));
+
+// U16
+const auto CastU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto CastU16toS8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S8));
+const auto CastU16toS16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S16));
+const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto CastU16toS32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S32));
+const auto CastU16toF16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F16));
+const auto CastU16toF32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F32));
+
+// S16
+const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto CastS16toS8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S8));
+const auto CastS16toU16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U16));
+const auto CastS16toU32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U32));
+const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto CastS16toF16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F16));
+const auto CastS16toF32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F32));
+
+// U32
+const auto CastU32toU8Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U8));
+const auto CastU32toS8Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S8));
+const auto CastU32toU16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U16));
+const auto CastU32toS16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S16));
+const auto CastU32toS32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S32));
+const auto CastU32toF16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F16));
+const auto CastU32toF32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F32));
+
+// S32
+const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8));
+const auto CastS32toS8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S8));
+const auto CastS32toU16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U16));
+const auto CastS32toS16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S16));
+const auto CastS32toU32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U32));
+const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16));
+const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32));
+
+// F16
+const auto CastF16toU8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8));
+const auto CastF16toS8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S8));
+const auto CastF16toU16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U16));
+const auto CastF16toS16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S16));
+const auto CastF16toU32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U32));
+const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32));
+const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+
+// F32
+const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8));
+const auto CastF32toS8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S8));
+const auto CastF32toU16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U16));
+const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S16));
+const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32));
+const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
+const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Cast)
+template <typename T>
+using CLCastToU8Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint8_t>;
+template <typename T>
+using CLCastToS8Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int8_t>;
+template <typename T>
+using CLCastToU16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint16_t>;
+template <typename T>
+using CLCastToS16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int16_t>;
+template <typename T>
+using CLCastToU32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint32_t>;
+template <typename T>
+using CLCastToS32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int32_t>;
+template <typename T>
+using CLCastToF16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, half>;
+template <typename T>
+using CLCastToF32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, float>;
+
+#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \
+ TEST_SUITE(NAME) \
+ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), datasets::ConvertPolicies()), \
+ shape, policy) \
+ { \
+ CLTensor src = create_tensor<CLTensor>(shape, idt, 1); \
+ CLTensor dst = create_tensor<CLTensor>(shape, odt, 1); \
+ \
+ CLCast cast; \
+ cast.configure(&src, &dst, policy); \
+ \
+ const ValidRegion valid_region = shape_to_valid_region(shape); \
+ validate(dst.info()->valid_region(), valid_region); \
+ \
+ const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); \
+ validate(src.info()->padding(), padding); \
+ validate(dst.info()->padding(), padding); \
+ } \
+ FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \
+ datasets::ConvertPolicies())) \
+ { \
+ validate(CLAccessor(_target), _reference, tolerance); \
+ } \
+ TEST_SUITE_END()
+
+// U8
+CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, CLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance)
+CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, CLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance)
+CAST_SUITE(U8_to_S16, DataType::U8, DataType::S16, CLCastToS16Fixture<uint8_t>, CastU8toS16Dataset, zero_tolerance)
+CAST_SUITE(U8_to_U32, DataType::U8, DataType::U32, CLCastToU32Fixture<uint8_t>, CastU8toU32Dataset, zero_tolerance)
+CAST_SUITE(U8_to_S32, DataType::U8, DataType::S32, CLCastToS32Fixture<uint8_t>, CastU8toS32Dataset, zero_tolerance)
+CAST_SUITE(U8_to_F16, DataType::U8, DataType::F16, CLCastToF16Fixture<uint8_t>, CastU8toF16Dataset, zero_tolerance)
+CAST_SUITE(U8_to_F32, DataType::U8, DataType::F32, CLCastToF32Fixture<uint8_t>, CastU8toF32Dataset, zero_tolerance)
+
+// S8
+CAST_SUITE(S8_to_U8, DataType::S8, DataType::U8, CLCastToU8Fixture<int8_t>, CastS8toU8Dataset, zero_tolerance)
+CAST_SUITE(S8_to_U16, DataType::S8, DataType::U16, CLCastToU16Fixture<int8_t>, CastS8toU16Dataset, zero_tolerance)
+CAST_SUITE(S8_to_S16, DataType::S8, DataType::S16, CLCastToS16Fixture<int8_t>, CastS8toS16Dataset, zero_tolerance)
+CAST_SUITE(S8_to_U32, DataType::S8, DataType::U32, CLCastToU32Fixture<int8_t>, CastS8toU32Dataset, zero_tolerance)
+CAST_SUITE(S8_to_S32, DataType::S8, DataType::S32, CLCastToS32Fixture<int8_t>, CastS8toS32Dataset, zero_tolerance)
+CAST_SUITE(S8_to_F16, DataType::S8, DataType::F16, CLCastToF16Fixture<int8_t>, CastS8toF16Dataset, zero_tolerance)
+CAST_SUITE(S8_to_F32, DataType::S8, DataType::F32, CLCastToF32Fixture<int8_t>, CastS8toF32Dataset, zero_tolerance)
+
+// U16
+CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, CLCastToU8Fixture<uint16_t>, CastU16toU8Dataset, zero_tolerance)
+CAST_SUITE(U16_to_S8, DataType::U16, DataType::S8, CLCastToS8Fixture<uint16_t>, CastU16toS8Dataset, zero_tolerance)
+CAST_SUITE(U16_to_S16, DataType::U16, DataType::S16, CLCastToS16Fixture<uint16_t>, CastU16toS16Dataset, zero_tolerance)
+CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, CLCastToU32Fixture<uint16_t>, CastU16toU32Dataset, zero_tolerance)
+CAST_SUITE(U16_to_S32, DataType::U16, DataType::S32, CLCastToS32Fixture<uint16_t>, CastU16toS32Dataset, zero_tolerance)
+CAST_SUITE(U16_to_F16, DataType::U16, DataType::F16, CLCastToF16Fixture<uint16_t>, CastU16toF16Dataset, zero_tolerance)
+CAST_SUITE(U16_to_F32, DataType::U16, DataType::F32, CLCastToF32Fixture<uint16_t>, CastU16toF32Dataset, zero_tolerance)
+
+// S16
+CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, CLCastToU8Fixture<int16_t>, CastS16toU8Dataset, zero_tolerance)
+CAST_SUITE(S16_to_S8, DataType::S16, DataType::S8, CLCastToS8Fixture<int16_t>, CastS16toS8Dataset, zero_tolerance)
+CAST_SUITE(S16_to_U16, DataType::S16, DataType::U16, CLCastToU16Fixture<int16_t>, CastS16toU16Dataset, zero_tolerance)
+CAST_SUITE(S16_to_U32, DataType::S16, DataType::U32, CLCastToU32Fixture<int16_t>, CastS16toU32Dataset, zero_tolerance)
+CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, CLCastToS32Fixture<int16_t>, CastS16toS32Dataset, zero_tolerance)
+CAST_SUITE(S16_to_F16, DataType::S16, DataType::F16, CLCastToF16Fixture<int16_t>, CastS16toF16Dataset, zero_tolerance)
+CAST_SUITE(S16_to_F32, DataType::S16, DataType::F32, CLCastToF32Fixture<int16_t>, CastS16toF32Dataset, zero_tolerance)
+
+// U32
+CAST_SUITE(U32_to_U8, DataType::U32, DataType::U8, CLCastToU8Fixture<uint32_t>, CastU32toU8Dataset, zero_tolerance)
+CAST_SUITE(U32_to_S8, DataType::U32, DataType::S8, CLCastToS8Fixture<uint32_t>, CastU32toS8Dataset, zero_tolerance)
+CAST_SUITE(U32_to_U16, DataType::U32, DataType::U16, CLCastToU16Fixture<uint32_t>, CastU32toU16Dataset, zero_tolerance)
+CAST_SUITE(U32_to_S16, DataType::U32, DataType::S16, CLCastToS16Fixture<uint32_t>, CastU32toS16Dataset, zero_tolerance)
+CAST_SUITE(U32_to_S32, DataType::U32, DataType::S32, CLCastToS32Fixture<uint32_t>, CastU32toS32Dataset, zero_tolerance)
+CAST_SUITE(U32_to_F16, DataType::U32, DataType::F16, CLCastToF16Fixture<uint32_t>, CastU32toF16Dataset, zero_tolerance)
+CAST_SUITE(U32_to_F32, DataType::U32, DataType::F32, CLCastToF32Fixture<uint32_t>, CastU32toF32Dataset, zero_tolerance)
+
+// S32
+CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, CLCastToU8Fixture<int32_t>, CastS32toU8Dataset, zero_tolerance)
+CAST_SUITE(S32_to_S8, DataType::S32, DataType::S8, CLCastToS8Fixture<int32_t>, CastS32toS8Dataset, zero_tolerance)
+CAST_SUITE(S32_to_U16, DataType::S32, DataType::U16, CLCastToU16Fixture<int32_t>, CastS32toU16Dataset, zero_tolerance)
+CAST_SUITE(S32_to_S16, DataType::S32, DataType::S16, CLCastToS16Fixture<int32_t>, CastS32toS16Dataset, zero_tolerance)
+CAST_SUITE(S32_to_U32, DataType::S32, DataType::U32, CLCastToU32Fixture<int32_t>, CastS32toU32Dataset, zero_tolerance)
+CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, CLCastToF16Fixture<int32_t>, CastS32toF16Dataset, zero_tolerance)
+CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, CLCastToF32Fixture<int32_t>, CastS32toF32Dataset, zero_tolerance)
+
+// F16
+CAST_SUITE(F16_to_U8, DataType::F16, DataType::U8, CLCastToU8Fixture<half>, CastF16toU8Dataset, one_tolerance)
+CAST_SUITE(F16_to_S8, DataType::F16, DataType::S8, CLCastToS8Fixture<half>, CastF16toS8Dataset, one_tolerance)
+CAST_SUITE(F16_to_U16, DataType::F16, DataType::U16, CLCastToU16Fixture<half>, CastF16toU16Dataset, one_tolerance)
+CAST_SUITE(F16_to_S16, DataType::F16, DataType::S16, CLCastToS16Fixture<half>, CastF16toS16Dataset, one_tolerance)
+CAST_SUITE(F16_to_U32, DataType::F16, DataType::U32, CLCastToU32Fixture<half>, CastF16toU32Dataset, one_tolerance)
+CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, CLCastToS32Fixture<half>, CastF16toS32Dataset, one_tolerance)
+CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, CLCastToF32Fixture<half>, CastF16toF32Dataset, one_tolerance)
+
+// F32
+CAST_SUITE(F32_to_U8, DataType::F32, DataType::U8, CLCastToU8Fixture<float>, CastF32toU8Dataset, one_tolerance)
+CAST_SUITE(F32_to_S8, DataType::F32, DataType::S8, CLCastToS8Fixture<float>, CastF32toS8Dataset, one_tolerance)
+CAST_SUITE(F32_to_U16, DataType::F32, DataType::U16, CLCastToU16Fixture<float>, CastF32toU16Dataset, one_tolerance)
+CAST_SUITE(F32_to_S16, DataType::F32, DataType::S16, CLCastToS16Fixture<float>, CastF32toS16Dataset, one_tolerance)
+CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture<float>, CastF32toU32Dataset, one_tolerance)
+CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
+CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture<float>, CastF32toF16Dataset, one_tolerance)
+
+TEST_SUITE_END() // Cast
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
index fe46313568..7d7b5f2176 100644
--- a/tests/validation/CL/DepthConvertLayer.cpp
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -348,84 +348,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, frame
}
TEST_SUITE_END()
-TEST_SUITE(F16_to_F32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerShiftDataset),
- shape, policy, shift)
-{
- // Create tensors
- CLTensor src = create_tensor<CLTensor>(shape, DataType::F16, 1);
- CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1);
-
- // Create and Configure function
- CLDepthConvertLayer depth_convert;
- depth_convert.configure(&src, &dst, policy, shift);
-
- // Validate valid region
- const ValidRegion valid_region = shape_to_valid_region(shape);
- validate(dst.info()->valid_region(), valid_region);
-
- // Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
- validate(src.info()->padding(), padding);
- validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toF32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerShiftDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerShiftDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(F32_to_F16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerShiftDataset),
- shape, policy, shift)
-{
- // Create tensors
- CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1);
- CLTensor dst = create_tensor<CLTensor>(shape, DataType::F16, 1);
-
- // Create and Configure function
- CLDepthConvertLayer depth_convert;
- depth_convert.configure(&src, &dst, policy, shift);
-
- // Validate valid region
- const ValidRegion valid_region = shape_to_valid_region(shape);
- validate(dst.info()->valid_region(), valid_region);
-
- // Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
- validate(src.info()->padding(), padding);
- validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toF16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerShiftDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
- DepthConvertLayerShiftDataset))
-{
- // Validate output
- validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
TEST_SUITE_END()
TEST_SUITE_END()
} // namespace validation
diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h
new file mode 100644
index 0000000000..3a6efa22af
--- /dev/null
+++ b/tests/validation/fixtures/CastFixture.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CAST_FIXTURE
+#define ARM_COMPUTE_TEST_CAST_FIXTURE
+
+#include "tests/validation/fixtures/DepthConvertLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class CastValidationFixture : public framework::Fixture
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+ {
+ _target = compute_target(shape, dt_in, dt_out, policy);
+ _reference = compute_reference(shape, dt_in, dt_out, policy);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i, DataType dt_in, DataType dt_out)
+ {
+ // Restricting range to avoid inf values
+ if(dt_out == DataType::F16)
+ {
+ const int signed_min = -32000;
+ const int signed_max = 32000;
+ const int unsigned_min = 0;
+ const int unsigned_max = 65000;
+
+ switch(dt_in)
+ {
+ case DataType::U8:
+ case DataType::QASYMM8:
+ case DataType::S8:
+ case DataType::F32:
+ {
+ library->fill_tensor_uniform(tensor, i);
+ break;
+ }
+ case DataType::U16:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min), static_cast<uint16_t>(unsigned_max));
+ break;
+ }
+ case DataType::S16:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min), static_cast<int16_t>(signed_max));
+ break;
+ }
+ case DataType::U32:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min), static_cast<uint32_t>(unsigned_max));
+ break;
+ }
+ case DataType::S32:
+ {
+ library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), static_cast<int32_t>(signed_max));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+ }
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+
+ TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+ {
+ // Create tensors
+ TensorType src = create_tensor<TensorType>(shape, dt_in, 1);
+ TensorType dst = create_tensor<TensorType>(shape, dt_out, 1);
+
+ // Create and configure function
+ FunctionType cast;
+ cast.configure(&src, &dst, policy);
+
+ ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Allocate tensors
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Fill tensors
+ fill(AccessorType(src), 0, dt_in, dt_out);
+
+ // Compute function
+ cast.run();
+
+ return dst;
+ }
+
+ SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+ {
+ // Create reference
+ SimpleTensor<T1> src{ shape, dt_in, 1 };
+
+ // Fill reference
+ fill(src, 0, dt_in, dt_out);
+
+ return reference::depth_convert<T1, T2>(src, dt_out, policy, 0);
+ }
+
+ TensorType _target{};
+ SimpleTensor<T2> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CAST_FIXTURE */
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index fd2e0ae378..4d5b97b478 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -25,6 +25,9 @@
#include "tests/validation/Helpers.h"
+#include "arm_compute/core/utils/misc/Rounding.h"
+#include "arm_compute/core/utils/misc/SaturateCast.h"
+
#include "tests/Types.h"
namespace arm_compute
@@ -35,13 +38,13 @@ namespace validation
{
namespace reference
{
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type >
SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
{
SimpleTensor<T2> result(src.shape(), dt_out);
// Up-casting
- if(src.data_type() <= dt_out)
+ if(element_size_from_data_type(src.data_type()) < element_size_from_data_type(dt_out))
{
for(int i = 0; i < src.num_elements(); ++i)
{
@@ -54,48 +57,100 @@ SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, Con
for(int i = 0; i < src.num_elements(); ++i)
{
T1 val = src[i] >> shift;
- result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
+ result[i] = (policy == ConvertPolicy::SATURATE) ? utils::cast::saturate_cast<T2>(val) : static_cast<T2>(val);
}
}
return result;
}
-template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type >
SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
{
SimpleTensor<T2> result(src.shape(), dt_out);
+ ARM_COMPUTE_ERROR_ON(shift != 0);
+ ARM_COMPUTE_UNUSED(policy, shift);
- const uint32_t scale = 1 << shift;
-
- // Up-casting
- if(src.data_type() <= dt_out)
+ // Always saturate on floats
+ for(int i = 0; i < src.num_elements(); ++i)
{
- for(int i = 0; i < src.num_elements(); ++i)
- {
- result[i] = src[i] * static_cast<T2>(scale);
- }
- }
- // Down-casting
- else
- {
- for(int i = 0; i < src.num_elements(); ++i)
- {
- T1 val = src[i] / static_cast<T1>(scale);
- result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
- }
+ T1 val = utils::rounding::round_half_away_from_zero(src[i]);
+ result[i] = utils::cast::saturate_cast<T2>(val);
}
return result;
}
+// U8
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S8
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U16
template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S16
template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// F16
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
template SimpleTensor<float> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// F32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/DepthConvertLayer.h b/tests/validation/reference/DepthConvertLayer.h
index 5d97c73b3c..2113593e9d 100644
--- a/tests/validation/reference/DepthConvertLayer.h
+++ b/tests/validation/reference/DepthConvertLayer.h
@@ -35,10 +35,10 @@ namespace validation
{
namespace reference
{
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
} // namespace reference
} // namespace validation