COMPMID-1718: Extend DepthConvert to support Cast

Change-Id: I6ee2c0b670727fc808fa636c53ddfaec3a0036c9
author: Georgios Pinitas <georgios.pinitas@arm.com> 2018-11-19 11:56:51 +0000
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2018-11-22 12:41:32 +0000
commit: 303f0dbebf631b3db00d9d64e71018abbbe9d4fe (patch)
tree: 631e70c9a8141f1262752829a64b3e33c7f1ee93
parent: 9d3a831d4131f8a8b37f127f11d36848d33e8496 (diff)
download: ComputeLibrary-303f0dbebf631b3db00d9d64e71018abbbe9d4fe.tar.gz
19 files changed, 1151 insertions, 188 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
index b70a6a993a..c155cb2982 100644
--- a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -43,24 +43,24 @@ public:
      *
      * Valid conversions Input -> Output :
      *
-     *   - U8 -> U16, S16, U32, S32
-     *   - U16 -> U8, U32, S32
-     *   - S16 -> U8, U32, S32
-     *   - U32 -> U8, U16, S16
-     *   - S32 -> U8, U16, S16
-     *   - F16 -> F32
-     *   - F32 -> F16
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
      *
-     * @param[in]  input  The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F16/F32.
-     * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input  The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
      * @param[in]  policy Conversion policy
      * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
      */
     void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
      *
-     * @param[in] input  Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32.
-     * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
      * @param[in] policy Conversion policy
      * @param[in] shift  Value for down/up conversions. Must be 0 <= shift < 8.
      *
diff --git a/arm_compute/core/utils/misc/Requires.h b/arm_compute/core/utils/misc/Requires.h
new file mode 100644
index 0000000000..2852300bb1
--- /dev/null
+++ b/arm_compute/core/utils/misc/Requires.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_REQUIRES_H__
+#define __ARM_COMPUTE_UTILS_REQUIRES_H__
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace requires
+{
+// *INDENT-OFF*
+// clang-format off
+namespace detail
+{
+enum class enabler
+{
+};
+} // namespace arm_compute
+
+/** Requirements as template */
+#define REQUIRES_T(...) template <bool Cond = (__VA_ARGS__), typename std::enable_if<Cond, int>::type = 0>
+/** Requirements as template argument */
+#define REQUIRES_TA(...) typename = typename std::enable_if<(__VA_ARGS__), arm_compute::utils::requires::detail::enabler>::type
+// clang-format on
+// *INDENT-ON*
+} // namespace requires
+} // namespace utils
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_UTILS_REQUIRES_H__ */
diff --git a/arm_compute/core/utils/misc/Rounding.h b/arm_compute/core/utils/misc/Rounding.h
new file mode 100644
index 0000000000..3f86cc8db3
--- /dev/null
+++ b/arm_compute/core/utils/misc/Rounding.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_ROUNDING_H__
+#define __ARM_COMPUTE_UTILS_ROUNDING_H__
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/utils/misc/Requires.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "support/ToolchainSupport.h"
+
+#include <cmath>
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace rounding
+{
+/** Rounding mode */
+enum class RoundingMode
+{
+    TO_ZERO,             /**< Round towards zero */
+    AWAY_FROM_ZERO,      /**< Round away from zero */
+    HALF_TO_ZERO,        /**< Round half towards from zero */
+    HALF_AWAY_FROM_ZERO, /**< Round half away from zero */
+    HALF_UP,             /**< Round half towards positive infinity */
+    HALF_DOWN,           /**< Round half towards negative infinity */
+    HALF_EVEN            /**< Round half towards nearest even */
+};
+
+/** Round floating-point value with round to zero
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_to_zero(T value)
+{
+    T res = std::floor(std::fabs(value));
+    return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with round away from zero
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_away_from_zero(T value)
+{
+    T res = std::ceil(std::fabs(value));
+    return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with half value rounding towards zero.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_to_zero(T value)
+{
+    T res = T(std::ceil(std::fabs(value) - 0.5f));
+    return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with half value rounding away from zero.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_away_from_zero(T value)
+{
+    T res = T(std::floor(std::fabs(value) + 0.5f));
+    return (value < 0.f) ? -res : res;
+}
+
+/** Round floating-point value with half value rounding to positive infinity.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_up(T value)
+{
+    return std::floor(value + 0.5f);
+}
+
+/** Round floating-point value with half value rounding to negative infinity.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_down(T value)
+{
+    return std::ceil(value - 0.5f);
+}
+
+/** Round floating-point value with half value rounding to nearest even.
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value   floating-point value to be rounded.
+ * @param[in] epsilon precision.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon())
+{
+    T positive_value = std::abs(value);
+    T ipart          = 0;
+    std::modf(positive_value, &ipart);
+    // If 'value' is exactly halfway between two integers
+    if(std::abs(positive_value - (ipart + 0.5f)) < epsilon)
+    {
+        // If 'ipart' is even then return 'ipart'
+        if(std::fmod(ipart, 2.f) < epsilon)
+        {
+            return support::cpp11::copysign(ipart, value);
+        }
+        // Else return the nearest even integer
+        return support::cpp11::copysign(std::ceil(ipart + 0.5f), value);
+    }
+    // Otherwise use the usual round to closest
+    return support::cpp11::copysign(support::cpp11::round(positive_value), value);
+}
+
+/** Round floating-point value given a rounding mode
+ *
+ * @tparam T Parameter type. Should be of floating point type.
+ *
+ * @param[in] value         floating-point value to be rounded.
+ * @param[in] rounding_mode Rounding mode to use.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, REQUIRES_TA(traits::is_floating_point<T>::value)>
+inline T round(T value, RoundingMode rounding_mode)
+{
+    switch(rounding_mode)
+    {
+        case RoundingMode::TO_ZERO:
+            return round_to_zero(value);
+        case RoundingMode::AWAY_FROM_ZERO:
+            return round_away_from_zero(value);
+        case RoundingMode::HALF_TO_ZERO:
+            return round_half_to_zero(value);
+        case RoundingMode::HALF_AWAY_FROM_ZERO:
+            return round_half_away_from_zero(value);
+        case RoundingMode::HALF_UP:
+            return round_half_up(value);
+        case RoundingMode::HALF_DOWN:
+            return round_half_down(value);
+        case RoundingMode::HALF_EVEN:
+            return round_half_even(value);
+        default:
+            ARM_COMPUTE_ERROR("Unsupported rounding mode!");
+    }
+}
+} // namespace rounding
+} // namespace utils
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_UTILS_ROUNDING_H__ */
diff --git a/arm_compute/core/utils/misc/SaturateCast.h b/arm_compute/core/utils/misc/SaturateCast.h
new file mode 100644
index 0000000000..b9e4787d9a
--- /dev/null
+++ b/arm_compute/core/utils/misc/SaturateCast.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__
+#define __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__
+
+#include "arm_compute/core/utils/misc/Rounding.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/utils/misc/Utility.h"
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace cast
+{
+// *INDENT-OFF*
+// clang-format off
+// same type
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_same<T, U>::value, int >::type = 0 >
+T saturate_cast(U v)
+{
+    return v;
+}
+
+// signed -> signed widening/same_width
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_signed<U>() &&
+                                 std::is_signed<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) >= sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(v);
+}
+// signed -> signed narrowing
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_signed<U>() &&
+                                 std::is_signed<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) < sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
+}
+
+// unsigned -> signed widening
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_unsigned<U>() &&
+                                 std::is_signed<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 (sizeof(T) > sizeof(U)),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(v);
+}
+// unsigned -> signed narrowing
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_unsigned<U>() &&
+                                 std::is_signed<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) < sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
+}
+// unsigned -> signed same_width
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_unsigned<U>() &&
+                                 std::is_signed<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) == sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(std::min<U>(v, std::numeric_limits<T>::max()));
+}
+
+// signed -> unsigned widening/same width
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_signed<U>() &&
+                                 std::is_unsigned<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) >= sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(std::max<U>(0, v));
+}
+
+// signed -> unsigned narrowing
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_signed<U>() &&
+                                 std::is_unsigned<T>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) < sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(utility::clamp<U>(v, 0, std::numeric_limits<T>::max()));
+}
+
+// unsigned -> unsigned widening/same width
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_unsigned<T>() &&
+                                 std::is_unsigned<U>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) >= sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(v);
+}
+
+// unsigned -> unsigned narrowing
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 std::is_integral<U>::value &&
+                                 std::is_unsigned<T>() &&
+                                 std::is_unsigned<U>() &&
+                                 !std::is_same<T, U>::value &&
+                                 sizeof(T) < sizeof(U),
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(utility::clamp<U>(v, std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max()));
+}
+
+// float -> int
+template<typename T,
+         typename U,
+         typename std::enable_if<std::is_integral<T>::value &&
+                                 traits::is_floating_point<U>::value,
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    int32_t vi = utils::rounding::round_half_away_from_zero(v);
+    return saturate_cast<T>(vi);
+}
+
+// int -> float
+template<typename T,
+         typename U,
+         typename std::enable_if<traits::is_floating_point<T>::value &&
+                                 std::is_integral<U>::value,
+                  int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(v);
+}
+
+// float -> float
+template<typename T,
+        typename U,
+        typename std::enable_if<traits::is_floating_point<T>::value &&
+                                traits::is_floating_point<U>::value,
+                int >::type = 0 >
+inline T saturate_cast(U v)
+{
+    return static_cast<T>(v);
+}
+// clang-format on
+// *INDENT-ON*
+} // namespace cast
+} // namespace utils
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_UTILS_CAST_SATURATE_CAST_H__ */
diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h
new file mode 100644
index 0000000000..9d86dd1b3c
--- /dev/null
+++ b/arm_compute/core/utils/misc/Traits.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__
+#define __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__
+
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace utils
+{
+namespace traits
+{
+template <typename T>
+struct is_floating_point : public std::is_floating_point<T>
+{
+};
+
+template <>
+struct is_floating_point<half> : public std::true_type
+{
+};
+} // namespace traits
+} // namespace utils
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_UTILS_TRAITS_TRAITS_H__ */
diff --git a/arm_compute/core/utils/misc/Utility.h b/arm_compute/core/utils/misc/Utility.h
index 0a9f180b4c..37c8b66e06 100644
--- a/arm_compute/core/utils/misc/Utility.h
+++ b/arm_compute/core/utils/misc/Utility.h
@@ -128,22 +128,6 @@ inline auto foldl(F &&func, T &&initial, U &&value, Us &&... values) -> decltype
     return foldl(std::forward<F>(func), func(std::forward<T>(initial), std::forward<U>(value)), std::forward<Us>(values)...);
 }
 
-/** Type cast with saturation.
- *
- * @param[in] val Value of type U to cast.
- *
- * @return Original value clamped to numeric limits of T and converted to type T.
- *
- * @warning Numeric limits of T must be representable without loss in type U.
- */
-template <typename T, typename U>
-T saturate_cast(U val)
-{
-    const auto low  = static_cast<U>(std::numeric_limits<T>::lowest());
-    const auto high = static_cast<U>(std::numeric_limits<T>::max());
-    return static_cast<T>(clamp(val, low, high));
-}
-
 /** Perform an index sort of a given vector.
  *
  * @param[in] v Vector to sort
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 9d4aa5b6a2..2a8294f28b 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -40,6 +40,7 @@
 #include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h"
 #include "arm_compute/runtime/CL/functions/CLBox3x3.h"
 #include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
+#include "arm_compute/runtime/CL/functions/CLCast.h"
 #include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
 #include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
 #include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
new file mode 100644
index 0000000000..ca50cf1691
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLCAST_H__
+#define __ARM_COMPUTE_CLCAST_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDepthConvertLayerKernel. */
+class CLCast : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
+     *
+     * @param[in]  input  The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy Conversion policy.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLCast
+     *
+     * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in] policy Conversion policy.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLCAST_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index 40ae907805..a1985562da 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -43,24 +43,24 @@ public:
      *
      * Valid conversions Input -> Output :
      *
-     *   - U8 -> U16, S16, U32, S32
-     *   - U16 -> U8, U32, S32
-     *   - S16 -> U8, U32, S32
-     *   - U32 -> U8, U16, S16
-     *   - S32 -> U8, U16, S16
-     *   - F16 -> F32
-     *   - F32 -> F16
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
      *
-     * @param[in]  input  The input tensor to convert. Data types supported: U8/U16/S16/U32/S32/F16/F32.
-     * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input  The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
      * @param[in]  policy Conversion policy.
      * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
      */
     void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayer
      *
-     * @param[in] input  Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32.
-     * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in] output Destination tensor info. Data type supported: U8/S8/U16/S16/U32/S32/F16/F32.
      * @param[in] policy Conversion policy.
      * @param[in] shift  Value for down/up conversions. Must be 0 <= shift < 8.
      *
@@ -68,5 +68,5 @@ public:
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift);
 };
-}
+} // namespace arm_compute
 #endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/
diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl
index 611449e614..7b03273b7b 100644
--- a/src/core/CL/cl_kernels/depth_convert.cl
+++ b/src/core/CL/cl_kernels/depth_convert.cl
@@ -69,8 +69,7 @@ __kernel void convert_depth_down(
     in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
 
 #if defined(IS_DATA_TYPE_FLOAT)
-    const DATA_TYPE_IN scale = (DATA_TYPE_IN)(1 << shift);
-    vstore16(CONVERT_DOWN(in_data / scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
+    vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
 #else  /* defined(IS_DATA_TYPE_FLOAT) */
     vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
 #endif /* defined(IS_DATA_TYPE_FLOAT) */
@@ -109,8 +108,7 @@ __kernel void convert_depth_up(
     in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
 
 #if defined(IS_DATA_TYPE_FLOAT)
-    const DATA_TYPE_OUT scale = (DATA_TYPE_OUT)(1 << shift);
-    vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) * scale, 0, (__global DATA_TYPE_OUT *)out.ptr);
+    vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
 #else  /* defined(IS_DATA_TYPE_FLOAT) */
     vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr);
 #endif /* defined(IS_DATA_TYPE_FLOAT) */
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index ffbd295646..b0c21624ed 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -37,8 +37,8 @@
 #include <set>
 #include <string>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
@@ -46,42 +46,20 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C
     ARM_COMPUTE_UNUSED(policy);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON(input == output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16,
-                                                         DataType::U16, DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
-                                                         DataType::U16, DataType::U32, DataType::S32,
-                                                         DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input,
+                                                         1,
+                                                         DataType::U8, DataType::S8, DataType::S16,
+                                                         DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+                                                         DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output,
+                                                         1,
+                                                         DataType::U8, DataType::S8, DataType::S16,
+                                                         DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+                                                         DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == output->data_type(), "Input and output data types must be different");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_float(input->data_type()) && shift != 0, "Shift is used only with integer inputs");
     ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8);
 
-    // Check if convertion is supported
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::U16 && output->data_type() != DataType::S16
-                                                                           && output->data_type() != DataType::U32 && output->data_type() != DataType::S32),
-                                    "Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32
-                                                                            && output->data_type() != DataType::S32),
-                                    "Only data types supported [in] U16 ->  [out] U8, U32, S32");
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32
-                                                                            && output->data_type() != DataType::S32),
-                                    "Only data types supported [in] S16 ->  [out] U8, U32, S32");
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16
-                                                                            && output->data_type() != DataType::S16),
-                                    "Only data types supported [in] U32 ->  [out] U8, U16, S16");
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U16
-                                                                            && output->data_type() != DataType::S16),
-                                    "Only data types supported [in] S32 ->  [out] U8, U16, S16");
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32,
-                                    "Only data types supported [in] F16 ->  [out] F32");
-
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16,
-                                    "Only data types supported [in] F32 ->  [out] F16");
-
     // Validate in case of configured output
     if(output->total_size() > 0)
     {
@@ -109,12 +87,12 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
     CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
     build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-    // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
-    build_opts.add_option_if(input_size > output_size, ((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE");
-    build_opts.add_option_if(is_data_type_float(input->info()->data_type()), "-DIS_DATA_TYPE_FLOAT");
+    // Conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
+    build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || policy == ConvertPolicy::SATURATE, "-DSATURATE");
+    build_opts.add_option_if(is_data_type_float(input->info()->data_type()) || is_data_type_float(output->info()->data_type()), "-DIS_DATA_TYPE_FLOAT");
 
     // Create kernel
-    const std::string kernel_name = (input_size > output_size) ? "convert_depth_down" : "convert_depth_up";
+    const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up";
     _kernel                       = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
 
     // Set shift arg
@@ -132,3 +110,4 @@ Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITens
 
     return Status{};
 }
+} // namespace arm_compute
+\ No newline at end of file
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 0f416defab..e9417ece44 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -34,7 +34,7 @@
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/Utility.h"
+#include "arm_compute/core/utils/misc/SaturateCast.h"
 
 #include <algorithm>
 #include <arm_neon.h>
@@ -667,7 +667,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons
             /* Run remaining elements */
             for(; i < input_width; ++i)
             {
-                out_ptr[i] = utility::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed);
+                out_ptr[i] = utils::cast::saturate_cast<qasymm8_t>(tmp_ptr[i] * sum_inversed);
             }
         }
     },
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
new file mode 100644
index 0000000000..e0ffcdb09b
--- /dev/null
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+namespace arm_compute
+{
+void CLCast::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
+{
+    auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
+    k->configure(input, output, policy, 0);
+    _kernel = std::move(k);
+}
+
+Status CLCast::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy)
+{
+    return CLDepthConvertLayerKernel::validate(input, output, policy, 0);
+}
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 2e52e8aadc..dbf71ac1e0 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -28,8 +28,8 @@
 
 #include <utility>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
 {
     auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
@@ -41,3 +41,4 @@ Status CLDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo
 {
     return CLDepthConvertLayerKernel::validate(input, output, policy, shift);
 }
+} // namespace arm_compute
diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp
new file mode 100644
index 0000000000..107edcd11e
--- /dev/null
+++ b/tests/validation/CL/Cast.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/CastFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+// Tolerance
+constexpr AbsoluteTolerance<float> one_tolerance(1);
+constexpr AbsoluteTolerance<float> zero_tolerance(0);
+
+/** Input data sets **/
+// U8
+const auto CastU8toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8));
+const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto CastU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto CastU8toU32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U32));
+const auto CastU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto CastU8toF16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F16));
+const auto CastU8toF32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F32));
+
+// S8
+const auto CastS8toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U8));
+const auto CastS8toU16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U16));
+const auto CastS8toS16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S16));
+const auto CastS8toU32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U32));
+const auto CastS8toS32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S32));
+const auto CastS8toF16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F16));
+const auto CastS8toF32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F32));
+
+// U16
+const auto CastU16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto CastU16toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S8));
+const auto CastU16toS16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S16));
+const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto CastU16toS32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S32));
+const auto CastU16toF16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F16));
+const auto CastU16toF32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F32));
+
+// S16
+const auto CastS16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto CastS16toS8Dataset  = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S8));
+const auto CastS16toU16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U16));
+const auto CastS16toU32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U32));
+const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto CastS16toF16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F16));
+const auto CastS16toF32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F32));
+
+// U32
+const auto CastU32toU8Dataset  = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U8));
+const auto CastU32toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S8));
+const auto CastU32toU16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U16));
+const auto CastU32toS16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S16));
+const auto CastU32toS32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S32));
+const auto CastU32toF16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F16));
+const auto CastU32toF32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F32));
+
+// S32
+const auto CastS32toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8));
+const auto CastS32toS8Dataset  = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S8));
+const auto CastS32toU16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U16));
+const auto CastS32toS16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S16));
+const auto CastS32toU32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U32));
+const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16));
+const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32));
+
+// F16
+const auto CastF16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8));
+const auto CastF16toS8Dataset  = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S8));
+const auto CastF16toU16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U16));
+const auto CastF16toS16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S16));
+const auto CastF16toU32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U32));
+const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32));
+const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+
+// F32
+const auto CastF32toU8Dataset  = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8));
+const auto CastF32toS8Dataset  = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S8));
+const auto CastF32toU16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U16));
+const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S16));
+const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32));
+const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
+const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Cast)
+template <typename T>
+using CLCastToU8Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint8_t>;
+template <typename T>
+using CLCastToS8Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int8_t>;
+template <typename T>
+using CLCastToU16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint16_t>;
+template <typename T>
+using CLCastToS16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int16_t>;
+template <typename T>
+using CLCastToU32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, uint32_t>;
+template <typename T>
+using CLCastToS32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, int32_t>;
+template <typename T>
+using CLCastToF16Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, half>;
+template <typename T>
+using CLCastToF32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T, float>;
+
+#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance)                                                                     \
+    TEST_SUITE(NAME)                                                                                                             \
+    DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), datasets::ConvertPolicies()),    \
+                   shape, policy)                                                                                                \
+    {                                                                                                                            \
+        CLTensor src = create_tensor<CLTensor>(shape, idt, 1);                                                                   \
+        CLTensor dst = create_tensor<CLTensor>(shape, odt, 1);                                                                   \
+        \
+        CLCast cast;                                                                                                             \
+        cast.configure(&src, &dst, policy);                                                                                      \
+        \
+        const ValidRegion valid_region = shape_to_valid_region(shape);                                                           \
+        validate(dst.info()->valid_region(), valid_region);                                                                      \
+        \
+        const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();                                         \
+        validate(src.info()->padding(), padding);                                                                                \
+        validate(dst.info()->padding(), padding);                                                                                \
+    }                                                                                                                            \
+    FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \
+                                                                                      datasets::ConvertPolicies()))              \
+    {                                                                                                                            \
+        validate(CLAccessor(_target), _reference, tolerance);                                                                    \
+    }                                                                                                                            \
+    TEST_SUITE_END()
+
+// U8
+CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, CLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance)
+CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, CLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance)
+CAST_SUITE(U8_to_S16, DataType::U8, DataType::S16, CLCastToS16Fixture<uint8_t>, CastU8toS16Dataset, zero_tolerance)
+CAST_SUITE(U8_to_U32, DataType::U8, DataType::U32, CLCastToU32Fixture<uint8_t>, CastU8toU32Dataset, zero_tolerance)
+CAST_SUITE(U8_to_S32, DataType::U8, DataType::S32, CLCastToS32Fixture<uint8_t>, CastU8toS32Dataset, zero_tolerance)
+CAST_SUITE(U8_to_F16, DataType::U8, DataType::F16, CLCastToF16Fixture<uint8_t>, CastU8toF16Dataset, zero_tolerance)
+CAST_SUITE(U8_to_F32, DataType::U8, DataType::F32, CLCastToF32Fixture<uint8_t>, CastU8toF32Dataset, zero_tolerance)
+
+// S8
+CAST_SUITE(S8_to_U8, DataType::S8, DataType::U8, CLCastToU8Fixture<int8_t>, CastS8toU8Dataset, zero_tolerance)
+CAST_SUITE(S8_to_U16, DataType::S8, DataType::U16, CLCastToU16Fixture<int8_t>, CastS8toU16Dataset, zero_tolerance)
+CAST_SUITE(S8_to_S16, DataType::S8, DataType::S16, CLCastToS16Fixture<int8_t>, CastS8toS16Dataset, zero_tolerance)
+CAST_SUITE(S8_to_U32, DataType::S8, DataType::U32, CLCastToU32Fixture<int8_t>, CastS8toU32Dataset, zero_tolerance)
+CAST_SUITE(S8_to_S32, DataType::S8, DataType::S32, CLCastToS32Fixture<int8_t>, CastS8toS32Dataset, zero_tolerance)
+CAST_SUITE(S8_to_F16, DataType::S8, DataType::F16, CLCastToF16Fixture<int8_t>, CastS8toF16Dataset, zero_tolerance)
+CAST_SUITE(S8_to_F32, DataType::S8, DataType::F32, CLCastToF32Fixture<int8_t>, CastS8toF32Dataset, zero_tolerance)
+
+// U16
+CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, CLCastToU8Fixture<uint16_t>, CastU16toU8Dataset, zero_tolerance)
+CAST_SUITE(U16_to_S8, DataType::U16, DataType::S8, CLCastToS8Fixture<uint16_t>, CastU16toS8Dataset, zero_tolerance)
+CAST_SUITE(U16_to_S16, DataType::U16, DataType::S16, CLCastToS16Fixture<uint16_t>, CastU16toS16Dataset, zero_tolerance)
+CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, CLCastToU32Fixture<uint16_t>, CastU16toU32Dataset, zero_tolerance)
+CAST_SUITE(U16_to_S32, DataType::U16, DataType::S32, CLCastToS32Fixture<uint16_t>, CastU16toS32Dataset, zero_tolerance)
+CAST_SUITE(U16_to_F16, DataType::U16, DataType::F16, CLCastToF16Fixture<uint16_t>, CastU16toF16Dataset, zero_tolerance)
+CAST_SUITE(U16_to_F32, DataType::U16, DataType::F32, CLCastToF32Fixture<uint16_t>, CastU16toF32Dataset, zero_tolerance)
+
+// S16
+CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, CLCastToU8Fixture<int16_t>, CastS16toU8Dataset, zero_tolerance)
+CAST_SUITE(S16_to_S8, DataType::S16, DataType::S8, CLCastToS8Fixture<int16_t>, CastS16toS8Dataset, zero_tolerance)
+CAST_SUITE(S16_to_U16, DataType::S16, DataType::U16, CLCastToU16Fixture<int16_t>, CastS16toU16Dataset, zero_tolerance)
+CAST_SUITE(S16_to_U32, DataType::S16, DataType::U32, CLCastToU32Fixture<int16_t>, CastS16toU32Dataset, zero_tolerance)
+CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, CLCastToS32Fixture<int16_t>, CastS16toS32Dataset, zero_tolerance)
+CAST_SUITE(S16_to_F16, DataType::S16, DataType::F16, CLCastToF16Fixture<int16_t>, CastS16toF16Dataset, zero_tolerance)
+CAST_SUITE(S16_to_F32, DataType::S16, DataType::F32, CLCastToF32Fixture<int16_t>, CastS16toF32Dataset, zero_tolerance)
+
+// U32
+CAST_SUITE(U32_to_U8, DataType::U32, DataType::U8, CLCastToU8Fixture<uint32_t>, CastU32toU8Dataset, zero_tolerance)
+CAST_SUITE(U32_to_S8, DataType::U32, DataType::S8, CLCastToS8Fixture<uint32_t>, CastU32toS8Dataset, zero_tolerance)
+CAST_SUITE(U32_to_U16, DataType::U32, DataType::U16, CLCastToU16Fixture<uint32_t>, CastU32toU16Dataset, zero_tolerance)
+CAST_SUITE(U32_to_S16, DataType::U32, DataType::S16, CLCastToS16Fixture<uint32_t>, CastU32toS16Dataset, zero_tolerance)
+CAST_SUITE(U32_to_S32, DataType::U32, DataType::S32, CLCastToS32Fixture<uint32_t>, CastU32toS32Dataset, zero_tolerance)
+CAST_SUITE(U32_to_F16, DataType::U32, DataType::F16, CLCastToF16Fixture<uint32_t>, CastU32toF16Dataset, zero_tolerance)
+CAST_SUITE(U32_to_F32, DataType::U32, DataType::F32, CLCastToF32Fixture<uint32_t>, CastU32toF32Dataset, zero_tolerance)
+
+// S32
+CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, CLCastToU8Fixture<int32_t>, CastS32toU8Dataset, zero_tolerance)
+CAST_SUITE(S32_to_S8, DataType::S32, DataType::S8, CLCastToS8Fixture<int32_t>, CastS32toS8Dataset, zero_tolerance)
+CAST_SUITE(S32_to_U16, DataType::S32, DataType::U16, CLCastToU16Fixture<int32_t>, CastS32toU16Dataset, zero_tolerance)
+CAST_SUITE(S32_to_S16, DataType::S32, DataType::S16, CLCastToS16Fixture<int32_t>, CastS32toS16Dataset, zero_tolerance)
+CAST_SUITE(S32_to_U32, DataType::S32, DataType::U32, CLCastToU32Fixture<int32_t>, CastS32toU32Dataset, zero_tolerance)
+CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, CLCastToF16Fixture<int32_t>, CastS32toF16Dataset, zero_tolerance)
+CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, CLCastToF32Fixture<int32_t>, CastS32toF32Dataset, zero_tolerance)
+
+// F16
+CAST_SUITE(F16_to_U8, DataType::F16, DataType::U8, CLCastToU8Fixture<half>, CastF16toU8Dataset, one_tolerance)
+CAST_SUITE(F16_to_S8, DataType::F16, DataType::S8, CLCastToS8Fixture<half>, CastF16toS8Dataset, one_tolerance)
+CAST_SUITE(F16_to_U16, DataType::F16, DataType::U16, CLCastToU16Fixture<half>, CastF16toU16Dataset, one_tolerance)
+CAST_SUITE(F16_to_S16, DataType::F16, DataType::S16, CLCastToS16Fixture<half>, CastF16toS16Dataset, one_tolerance)
+CAST_SUITE(F16_to_U32, DataType::F16, DataType::U32, CLCastToU32Fixture<half>, CastF16toU32Dataset, one_tolerance)
+CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, CLCastToS32Fixture<half>, CastF16toS32Dataset, one_tolerance)
+CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, CLCastToF32Fixture<half>, CastF16toF32Dataset, one_tolerance)
+
+// F32
+CAST_SUITE(F32_to_U8, DataType::F32, DataType::U8, CLCastToU8Fixture<float>, CastF32toU8Dataset, one_tolerance)
+CAST_SUITE(F32_to_S8, DataType::F32, DataType::S8, CLCastToS8Fixture<float>, CastF32toS8Dataset, one_tolerance)
+CAST_SUITE(F32_to_U16, DataType::F32, DataType::U16, CLCastToU16Fixture<float>, CastF32toU16Dataset, one_tolerance)
+CAST_SUITE(F32_to_S16, DataType::F32, DataType::S16, CLCastToS16Fixture<float>, CastF32toS16Dataset, one_tolerance)
+CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture<float>, CastF32toU32Dataset, one_tolerance)
+CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
+CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture<float>, CastF32toF16Dataset, one_tolerance)
+
+TEST_SUITE_END() // Cast
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
index fe46313568..7d7b5f2176 100644
--- a/tests/validation/CL/DepthConvertLayer.cpp
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -348,84 +348,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, frame
 }
 TEST_SUITE_END()
 
-TEST_SUITE(F16_to_F32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertLayerShiftDataset),
-               shape, policy, shift)
-{
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::F16, 1);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1);
-
-    // Create and Configure function
-    CLDepthConvertLayer depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toF32Dataset),
-                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                   DepthConvertLayerShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertLayerShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(F32_to_F16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertLayerShiftDataset),
-               shape, policy, shift)
-{
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::F16, 1);
-
-    // Create and Configure function
-    CLDepthConvertLayer depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toF16Dataset),
-                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                    DepthConvertLayerShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                  DepthConvertLayerShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
 TEST_SUITE_END()
 TEST_SUITE_END()
 } // namespace validation
diff --git a/tests/validation/fixtures/CastFixture.h b/tests/validation/fixtures/CastFixture.h
new file mode 100644
index 0000000000..3a6efa22af
--- /dev/null
+++ b/tests/validation/fixtures/CastFixture.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_CAST_FIXTURE
+#define ARM_COMPUTE_TEST_CAST_FIXTURE
+
+#include "tests/validation/fixtures/DepthConvertLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class CastValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+    {
+        _target    = compute_target(shape, dt_in, dt_out, policy);
+        _reference = compute_reference(shape, dt_in, dt_out, policy);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i, DataType dt_in, DataType dt_out)
+    {
+        // Restricting range to avoid inf values
+        if(dt_out == DataType::F16)
+        {
+            const int signed_min   = -32000;
+            const int signed_max   = 32000;
+            const int unsigned_min = 0;
+            const int unsigned_max = 65000;
+
+            switch(dt_in)
+            {
+                case DataType::U8:
+                case DataType::QASYMM8:
+                case DataType::S8:
+                case DataType::F32:
+                {
+                    library->fill_tensor_uniform(tensor, i);
+                    break;
+                }
+                case DataType::U16:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min), static_cast<uint16_t>(unsigned_max));
+                    break;
+                }
+                case DataType::S16:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min), static_cast<int16_t>(signed_max));
+                    break;
+                }
+                case DataType::U32:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min), static_cast<uint32_t>(unsigned_max));
+                    break;
+                }
+                case DataType::S32:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min), static_cast<int32_t>(signed_max));
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+            }
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, dt_in, 1);
+        TensorType dst = create_tensor<TensorType>(shape, dt_out, 1);
+
+        // Create and configure function
+        FunctionType cast;
+        cast.configure(&src, &dst, policy);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0, dt_in, dt_out);
+
+        // Compute function
+        cast.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+    {
+        // Create reference
+        SimpleTensor<T1> src{ shape, dt_in, 1 };
+
+        // Fill reference
+        fill(src, 0, dt_in, dt_out);
+
+        return reference::depth_convert<T1, T2>(src, dt_out, policy, 0);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T2> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_CAST_FIXTURE */
diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp
index fd2e0ae378..4d5b97b478 100644
--- a/tests/validation/reference/DepthConvertLayer.cpp
+++ b/tests/validation/reference/DepthConvertLayer.cpp
@@ -25,6 +25,9 @@
 
 #include "tests/validation/Helpers.h"
 
+#include "arm_compute/core/utils/misc/Rounding.h"
+#include "arm_compute/core/utils/misc/SaturateCast.h"
+
 #include "tests/Types.h"
 
 namespace arm_compute
@@ -35,13 +38,13 @@ namespace validation
 {
 namespace reference
 {
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
 {
     SimpleTensor<T2> result(src.shape(), dt_out);
 
     // Up-casting
-    if(src.data_type() <= dt_out)
+    if(element_size_from_data_type(src.data_type()) < element_size_from_data_type(dt_out))
     {
         for(int i = 0; i < src.num_elements(); ++i)
         {
@@ -54,48 +57,100 @@ SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, Con
         for(int i = 0; i < src.num_elements(); ++i)
         {
             T1 val    = src[i] >> shift;
-            result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
+            result[i] = (policy == ConvertPolicy::SATURATE) ? utils::cast::saturate_cast<T2>(val) : static_cast<T2>(val);
         }
     }
     return result;
 }
 
-template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
 {
     SimpleTensor<T2> result(src.shape(), dt_out);
+    ARM_COMPUTE_ERROR_ON(shift != 0);
+    ARM_COMPUTE_UNUSED(policy, shift);
 
-    const uint32_t scale = 1 << shift;
-
-    // Up-casting
-    if(src.data_type() <= dt_out)
+    // Always saturate on floats
+    for(int i = 0; i < src.num_elements(); ++i)
     {
-        for(int i = 0; i < src.num_elements(); ++i)
-        {
-            result[i] = src[i] * static_cast<T2>(scale);
-        }
-    }
-    // Down-casting
-    else
-    {
-        for(int i = 0; i < src.num_elements(); ++i)
-        {
-            T1 val    = src[i] / static_cast<T1>(scale);
-            result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
-        }
+        T1 val    = utils::rounding::round_half_away_from_zero(src[i]);
+        result[i] = utils::cast::saturate_cast<T2>(val);
     }
     return result;
 }
 
+// U8
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S8
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U16
 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S16
 template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// U32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<uint32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// S32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int32_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// F16
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 template SimpleTensor<float> depth_convert(const SimpleTensor<half> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+// F32
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<half> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/DepthConvertLayer.h b/tests/validation/reference/DepthConvertLayer.h
index 5d97c73b3c..2113593e9d 100644
--- a/tests/validation/reference/DepthConvertLayer.h
+++ b/tests/validation/reference/DepthConvertLayer.h
@@ -35,10 +35,10 @@ namespace validation
 {
 namespace reference
 {
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 
-template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&is_floating_point<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+template < typename T1, typename T2, typename std::enable_if < is_floating_point<T1>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
 SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
 } // namespace reference
 } // namespace validation
author	Georgios Pinitas <georgios.pinitas@arm.com>	2018-11-19 11:56:51 +0000
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2018-11-22 12:41:32 +0000
commit	303f0dbebf631b3db00d9d64e71018abbbe9d4fe (patch)
tree	631e70c9a8141f1262752829a64b3e33c7f1ee93
parent	9d3a831d4131f8a8b37f127f11d36848d33e8496 (diff)
download	ComputeLibrary-303f0dbebf631b3db00d9d64e71018abbbe9d4fe.tar.gz