aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2023-09-15 13:46:17 +0100
committerAdnan AlSinan <adnan.alsinan@arm.com>2023-09-15 16:42:15 +0000
commit40a9d3ea62d7dfed3fb42b5bc5c2ee5272fd89bf (patch)
tree7197f5334955bdcde27bd2ad5086a4f1dc58d949
parent2ffc85eb0dead5f623d503ed5d2f0a94aba57484 (diff)
downloadComputeLibrary-40a9d3ea62d7dfed3fb42b5bc5c2ee5272fd89bf.tar.gz
Remove deprecated support for BF16 in CpuCast
Resolves : [COMPMID-6212] Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com> Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I29bbd9a3d96af462faf7f0ee13b9849f75e05356 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10319 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--docs/user_guide/release_version_and_change_log.dox1
-rw-r--r--filelist.json3
-rw-r--r--src/BUILD.bazel1
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/cpu/kernels/CpuCastKernel.cpp43
-rw-r--r--src/cpu/kernels/CpuCastKernel.h14
-rw-r--r--src/cpu/kernels/cast/generic/neon/bfloat16.cpp146
-rw-r--r--src/cpu/operators/CpuCast.h9
-rw-r--r--tests/validation/NEON/Cast.cpp19
10 files changed, 21 insertions, 217 deletions
diff --git a/Android.bp b/Android.bp
index d7d900ccc4..a81bf87e62 100644
--- a/Android.bp
+++ b/Android.bp
@@ -481,7 +481,6 @@ cc_library_static {
"src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp",
"src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp",
"src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp",
- "src/cpu/kernels/cast/generic/neon/bfloat16.cpp",
"src/cpu/kernels/cast/generic/neon/fp16.cpp",
"src/cpu/kernels/crop/generic/neon/fp16.cpp",
"src/cpu/kernels/crop/generic/neon/fp32.cpp",
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index 1a5add02fc..05a18c0d17 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -55,6 +55,7 @@ v23.11 Public major release
- @ref experimental::dynamic_fusion::GpuCkwPool2d
- Add new OpenCLâ„¢ kernels:
- @ref opencl::kernels::ClMatMulLowpNativeMMULKernel support for QASYMM8 and QASYMM8_SIGNED, with batch support
+ - Deprecate support for Bfloat16 in @ref cpu::CpuCast.
v23.08 Public major release
- Deprecate the legacy 'libarm_compute_core' library. This library is an artifact of Compute Library's legacy library architecture and no longer serves any purpose.
diff --git a/filelist.json b/filelist.json
index 2a88aec37b..23ee9cae22 100644
--- a/filelist.json
+++ b/filelist.json
@@ -1051,8 +1051,7 @@
"common": [
"src/cpu/operators/CpuCast.cpp",
"src/cpu/kernels/CpuCastKernel.cpp",
- "src/runtime/NEON/functions/NECast.cpp",
- "src/cpu/kernels/cast/generic/neon/bfloat16.cpp"
+ "src/runtime/NEON/functions/NECast.cpp"
],
"neon":{
"fp16":["src/cpu/kernels/cast/generic/neon/fp16.cpp"]
diff --git a/src/BUILD.bazel b/src/BUILD.bazel
index ab0ea66bdd..3b428393fa 100644
--- a/src/BUILD.bazel
+++ b/src/BUILD.bazel
@@ -732,7 +732,6 @@ filegroup(
"cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp",
"cpu/kernels/boundingboxtransform/generic/neon/impl.cpp",
"cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp",
- "cpu/kernels/cast/generic/neon/bfloat16.cpp",
"cpu/kernels/cast/generic/neon/fp16.cpp",
"cpu/kernels/crop/generic/neon/fp16.cpp",
"cpu/kernels/crop/generic/neon/fp32.cpp",
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e6ef5bfc6a..0b3da44da9 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -724,7 +724,6 @@ target_sources(
cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp
cpu/kernels/boundingboxtransform/generic/neon/impl.cpp
cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp
- cpu/kernels/cast/generic/neon/bfloat16.cpp
cpu/kernels/cast/generic/neon/fp16.cpp
cpu/kernels/crop/generic/neon/fp16.cpp
cpu/kernels/crop/generic/neon/fp32.cpp
diff --git a/src/cpu/kernels/CpuCastKernel.cpp b/src/cpu/kernels/CpuCastKernel.cpp
index d478328d07..764a1ec71c 100644
--- a/src/cpu/kernels/CpuCastKernel.cpp
+++ b/src/cpu/kernels/CpuCastKernel.cpp
@@ -75,46 +75,34 @@ static const std::vector<CpuCastKernel::CastKernel> available_kernels =
REGISTER_FP16_NEON(arm_compute::cpu::neon_fp32_to_fp16_cast)
},
{
- "neon_fp32_to_bf16_cast",
- [](const CastDataTypeISASelectorData & data) { return data.src_dt == DataType::F32 && data.dst_dt == DataType::BFLOAT16 && data.isa.bf16; },
- REGISTER_BF16_NEON(arm_compute::cpu::neon_fp32_to_bfloat16_cast)
- },
- {
"neon_s32_cast",
[](const CastDataTypeISASelectorData & data) { return data.src_dt == DataType::S32 && data.dst_dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::neon_s32_to_fp16_cast)
},
- {
- "neon_bf16_cast",
- [](const CastDataTypeISASelectorData & data) { return data.src_dt == DataType::BFLOAT16 && data.dst_dt == DataType::F32 && data.isa.bf16; },
- REGISTER_BF16_NEON(arm_compute::cpu::neon_bfloat16_to_fp32_cast)
- },
};
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(dst);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(src);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_BF16_UNSUPPORTED(dst);
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON(src == dst);
#ifdef __aarch64__
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
- DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+ DataType::S16, DataType::U16, DataType::F16,
DataType::F32, DataType::S32, DataType::S64, DataType::U64);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
- DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+ DataType::S16, DataType::U16, DataType::F16,
DataType::U32, DataType::S32, DataType::F32, DataType::S64);
#else // __aarch64__
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
- DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+ DataType::S16, DataType::U16, DataType::F16,
DataType::F32, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8,
- DataType::S16, DataType::U16, DataType::BFLOAT16, DataType::F16,
+ DataType::S16, DataType::U16, DataType::F16,
DataType::U32, DataType::S32, DataType::F32);
#endif // __aarch64__
@@ -136,18 +124,15 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, Conver
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S16 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::U8 && dst->data_type() != DataType::S32),
"Only data_types supported [in] S16 -> [out] U8, S32");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::BFLOAT16 && dst->data_type() != DataType::F32,
- "Only data_types supported [in] BFLOAT16 -> [out] F32");
-
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::F16 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::QASYMM8
&& dst->data_type() != DataType::U8
&& dst->data_type() != DataType::F32 && dst->data_type() != DataType::S32),
"Only data_types supported [in] F16 -> [out] QASYMM8, F32, S32, U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::F32 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::QASYMM8
- && dst->data_type() != DataType::F16 && dst->data_type() != DataType::BFLOAT16
+ && dst->data_type() != DataType::F16
&& dst->data_type() != DataType::S32 && dst->data_type() != DataType::U8),
- "Only data_types supported [in] F32 -> [out] QASYMM8, BFLOAT16, F16, S32, U8");
+ "Only data_types supported [in] F32 -> [out] QASYMM8, F16, S32, U8");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S32 && (dst->data_type() != DataType::QASYMM8_SIGNED && dst->data_type() != DataType::QASYMM8
&& dst->data_type() != DataType::F16
@@ -346,7 +331,7 @@ void CpuCastKernel::run_op(ITensorPack &tensors, const Window &window, const Thr
Iterator src(_src, win);
Iterator dst(_dst, win);
- /*ukernel runs only when using fp16/bfloat16, so we validate it isn't a nullptr only before using it */
+ /*ukernel runs only when using fp16, so we validate it isn't a nullptr only before using it */
const auto *uk = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ _src->info()->data_type(), _dst->info()->data_type(), CPUInfo::get().get_isa() });
switch(_src->info()->data_type())
@@ -948,13 +933,6 @@ void CpuCastKernel::run_op(ITensorPack &tensors, const Window &window, const Thr
}
break;
}
- case DataType::BFLOAT16:
- {
- /* Up-conversion BFLOAT16 -> F32 */
- ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
- uk->ukernel(_src, _dst, info, _policy, window);
- break;
- }
case DataType::F16:
{
/* conversion F16 -> any data type */
@@ -972,13 +950,6 @@ void CpuCastKernel::run_op(ITensorPack &tensors, const Window &window, const Thr
uk->ukernel(_src, _dst, info, _policy, window);
break;
}
- case DataType::BFLOAT16:
- {
- /* Down-conversion F32 -> BFLOAT16 */
- ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
- uk->ukernel(_src, _dst, info, _policy, window);
- break;
- }
case DataType::S32:
{
/* Conversion F32 -> S32 */
diff --git a/src/cpu/kernels/CpuCastKernel.h b/src/cpu/kernels/CpuCastKernel.h
index d8e61e6011..a7e6417ff2 100644
--- a/src/cpu/kernels/CpuCastKernel.h
+++ b/src/cpu/kernels/CpuCastKernel.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_CAST_KERNEL_H
-#define ARM_COMPUTE_CPU_CAST_KERNEL_H
+#ifndef ACL_SRC_CPU_KERNELS_CPUCASTKERNEL_H
+#define ACL_SRC_CPU_KERNELS_CPUCASTKERNEL_H
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
@@ -54,19 +54,17 @@ public:
* - U8 -> U16, S16, S32, F32, F16
* - U16 -> U8, U32
* - S16 -> QASYMM8_SIGNED, U8, S32
- * - BFLOAT16 -> F32
* - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
* - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
* - S64 -> F32
- * - F32 -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
+ * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8
*
- * @param[in] src The src tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/S32/S64/BFLOAT16/F16/F32.
- * @param[out] dst The dst tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/S64/BFLOAT16/F16/F32.
+ * @param[in] src The src tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/S32/S64/F16/F32.
+ * @param[out] dst The dst tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/S64/F16/F32.
* @param[in] policy Conversion policy.
*
* @note S64 is only supported in aarch64
*
- * @deprecated Support for BFLOAT16 will be removed in 23.05 release
*/
void configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration
@@ -96,4 +94,4 @@ private:
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_CAST_KERNEL_H */
+#endif // ACL_SRC_CPU_KERNELS_CPUCASTKERNEL_H
diff --git a/src/cpu/kernels/cast/generic/neon/bfloat16.cpp b/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
deleted file mode 100644
index d8e2756192..0000000000
--- a/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (c) 2016-2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#if defined(ARM_COMPUTE_ENABLE_BF16)
-
-#include "arm_compute/core/CPP/CPPTypes.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "src/core/NEON/wrapper/wrapper.h"
-#include "src/cpu/kernels/CpuCastKernel.h"
-#include "src/cpu/kernels/cast/list.h"
-#include "support/SaturateCast.h"
-
-namespace arm_compute
-{
-namespace cpu
-{
-void neon_fp32_to_bfloat16_cast(const ITensor *_src, ITensor *_dst, const ThreadInfo &info, ConvertPolicy _policy, const Window &window)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_UNUSED(_policy);
-
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
- const int window_step_x = 16;
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
- ARM_COMPUTE_ERROR_ON(_src == _dst);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-
- Window win{ window };
- win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator src(_src, win);
- Iterator dst(_dst, win);
-
- /* Down-conversion F32 -> BFLOAT16 */
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto src_ptr = reinterpret_cast<const float *>(src.ptr());
- const auto dst_ptr = reinterpret_cast<bfloat16 *>(dst.ptr());
-
- int x = window_start_x;
- for(; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- wrapper::vcvt_bf16_f32(reinterpret_cast<float *>(src.ptr()),
- reinterpret_cast<uint16_t *>(dst.ptr()));
- wrapper::vcvt_bf16_f32(reinterpret_cast<float *>(src.ptr()) + 8,
- reinterpret_cast<uint16_t *>(dst.ptr()) + 8);
- }
-
- for(; x < window_end_x; ++x)
- {
- *(dst_ptr + x) = *(src_ptr + x);
- }
- },
- src, dst);
-}
-
-void neon_bfloat16_to_fp32_cast(const ITensor *_src, ITensor *_dst, const ThreadInfo &info, ConvertPolicy _policy, const Window &window)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_UNUSED(_policy);
-
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
- const int window_step_x = 16;
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
- ARM_COMPUTE_ERROR_ON(_src == _dst);
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
-
- Window win{ window };
- win.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator src(_src, win);
- Iterator dst(_dst, win);
- switch(_dst->info()->data_type())
- {
- case DataType::F32:
- {
- /* Up-conversion BFLOAT16 -> F32 */
- execute_window_loop(win, [&](const Coordinates &)
- {
- const auto src_ptr = reinterpret_cast<const bfloat16 *>(src.ptr());
- const auto dst_ptr = reinterpret_cast<float *>(dst.ptr());
-
- int x = window_start_x;
- for(; x <= (window_end_x - window_step_x); x += window_step_x)
- {
- const uint16x8x2_t texels =
- {
- {
- vld1q_u16(reinterpret_cast<uint16_t *>(src.ptr())),
- vld1q_u16(reinterpret_cast<uint16_t *>(src.ptr()) + 8)
- }
- };
-
- vst1q_f32(reinterpret_cast<float *>(dst.ptr()),
- vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_low_u16(texels.val[0])), 16)));
- vst1q_f32(reinterpret_cast<float *>(dst.ptr()) + 4,
- vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_high_u16(texels.val[0])), 16)));
- vst1q_f32(reinterpret_cast<float *>(dst.ptr()) + 8,
- vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_low_u16(texels.val[1])), 16)));
- vst1q_f32(reinterpret_cast<float *>(dst.ptr()) + 12,
- vreinterpretq_f32_u32(vshlq_n_u32(vmovl_u16(vget_high_u16(texels.val[1])), 16)));
- }
-
- for(; x < window_end_x; ++x)
- {
- *(dst_ptr + x) = float(*(src_ptr + x));
- }
- },
- src, dst);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("dst data type unsupported");
- }
-}
-
-} // namespace cpu
-} // namespace arm_compute
-
-#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
diff --git a/src/cpu/operators/CpuCast.h b/src/cpu/operators/CpuCast.h
index 356b033dbd..1f4da6e2a0 100644
--- a/src/cpu/operators/CpuCast.h
+++ b/src/cpu/operators/CpuCast.h
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_CAST_H
-#define ARM_COMPUTE_CPU_CAST_H
+#ifndef ACL_SRC_CPU_OPERATORS_CPUCAST_H
+#define ACL_SRC_CPU_OPERATORS_CPUCAST_H
#include "src/cpu/ICpuOperator.h"
@@ -51,14 +51,13 @@ public:
* |S16 | QASYMM8_SIGNED, U8, S32 |
* |F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8 |
* |S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8 |
- * |F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8|
+ * |F32 | QASYMM8_SIGNED, QASYMM8, F16, S32, U8|
* |S64 | F32 |
*
* @param[in] src The source tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/S64/F16/F32.
* @param[out] dst The destination tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy.
*
- * @deprecated Support for BFLOAT16 will be removed in 23.05 release
*
*/
void configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy);
@@ -72,4 +71,4 @@ public:
};
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_ACTIVATION_H */
+#endif // ACL_SRC_CPU_OPERATORS_CPUCAST_H
diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp
index a1ddcc9cad..b56594546b 100644
--- a/tests/validation/NEON/Cast.cpp
+++ b/tests/validation/NEON/Cast.cpp
@@ -217,7 +217,6 @@ DATA_TEST_CASE(KernelSelectionDstFP16, framework::DatasetMode::ALL,
DataType::S32,
DataType::QASYMM8,
DataType::QASYMM8_SIGNED,
- DataType::BFLOAT16,
})),
cpu_ext, data_type)
{
@@ -226,21 +225,9 @@ cpu_ext, data_type)
cpuinfo::CpuIsaInfo cpu_isa{};
cpu_isa.neon = (cpu_ext == "NEON");
+ cpu_isa.fp16 = true;
- cpu_isa.bf16 = (data_type == DataType::BFLOAT16);
-
- /* bf16 cast is different from all the others being converted to fp32 and not to fp16 */
- if(cpu_isa.bf16)
- {
- cpu_isa.fp16 = false;
- selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F32, cpu_isa }, cpu::KernelSelectionType::Preferred);
- }
- else
- {
- cpu_isa.fp16 = true;
- selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F16, cpu_isa }, cpu::KernelSelectionType::Preferred);
- }
-
+ selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ data_type, DataType::F16, cpu_isa }, cpu::KernelSelectionType::Preferred);
ARM_COMPUTE_ERROR_ON_NULLPTR(selected_impl);
std::string expected = lower_string(cpu_ext) + "_" + cpu_impl_dt(data_type) + "_cast";
@@ -254,7 +241,6 @@ DATA_TEST_CASE(KernelSelectionSrcFP32, framework::DatasetMode::ALL,
framework::dataset::make("DataType",
{
DataType::F16,
- DataType::BFLOAT16,
})),
cpu_ext, data_type)
{
@@ -263,7 +249,6 @@ cpu_ext, data_type)
cpuinfo::CpuIsaInfo cpu_isa{};
cpu_isa.neon = (cpu_ext == "NEON");
cpu_isa.fp16 = (data_type == DataType::F16);
- cpu_isa.bf16 = (data_type == DataType::BFLOAT16);
const auto *selected_impl = CpuCastKernel::get_implementation(CastDataTypeISASelectorData{ DataType::F32, data_type, cpu_isa }, cpu::KernelSelectionType::Preferred);