aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Marquez Tello <pablo.tello@arm.com>2022-07-19 12:19:46 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2022-07-25 15:56:00 +0000
commitd208f4f1c2da9e407f86022959c32e8ab9a4aa6e (patch)
treef1dc538edb68fa6e266ad8890582f179b961bfae
parent962531886eee29527bf027107124a27ce94047eb (diff)
downloadComputeLibrary-d208f4f1c2da9e407f86022959c32e8ab9a4aa6e.tar.gz
Enable march=armv8.6-a in non multi-isa builds
* scons arch=armv8.6-a translates to -march=armv8.6-a * scons arch=armv8.6-a-sve translates to -march=armv8.6-a+sve * scons arch=armv8.6-a-sve2 translates to -march=armv8.6-a+sve2 * Resolves COMPMID-5408 Change-Id: I0901e1de864d00109759509af7cc2b5c9ae1cd75 Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7943 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--SConstruct15
-rw-r--r--filedefs.json5
-rw-r--r--src/core/NEON/wrapper/intrinsics/cvt.h6
-rw-r--r--src/core/common/Registrars.h6
-rw-r--r--src/cpu/kernels/CpuIm2ColKernel.cpp10
-rw-r--r--src/cpu/kernels/cast/generic/neon/bfloat16.cpp4
-rw-r--r--src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp26
-rw-r--r--support/Bfloat16.h8
-rw-r--r--tests/validation/NEON/ConvolutionLayer.cpp50
-rw-r--r--tests/validation/NEON/DepthConvertLayer.cpp6
-rw-r--r--utils/TypePrinter.h10
11 files changed, 93 insertions, 53 deletions
diff --git a/SConstruct b/SConstruct
index 7bdbb113ef..7f0ebc1a0b 100644
--- a/SConstruct
+++ b/SConstruct
@@ -306,6 +306,17 @@ else: # NONE "multi_isa" builds
env.Append(CXXFLAGS = ['-mfloat-abi=softfp'])
else:
env.Append(CXXFLAGS = ['-mfloat-abi=hard'])
+ elif 'v8.6-a' in env['arch']:
+ if 'armv8.6-a-sve2' == env['arch']:
+ env.Append(CXXFLAGS = ['-march=armv8.6-a+sve2'])
+ elif 'armv8.6-a-sve' == env['arch']:
+ env.Append(CXXFLAGS = ['-march=armv8.6-a+sve'])
+ elif 'armv8.6-a' == env['arch']:
+ env.Append(CXXFLAGS = ['-march=armv8.6-a'])
+
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16','ARM_COMPUTE_ENABLE_FP16'])
+ if "disable_mmla_fp" not in env['custom_options']:
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM'])
elif 'v8' in env['arch']:
# Preserve the V8 archs for non-multi-ISA variants
if 'sve2' in env['arch']:
@@ -319,10 +330,6 @@ else: # NONE "multi_isa" builds
else:
env.Append(CXXFLAGS = ['-march=armv8-a'])
- if 'v8.6-a' in env['arch']:
- env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_I8MM', 'ARM_COMPUTE_ENABLE_BF16'])
- if "disable_mmla_fp" not in env['custom_options']:
- env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_SVEF32MM'])
if 'v8.' in env['arch']:
env.Append(CPPDEFINES = ['ARM_COMPUTE_ENABLE_FP16'])
diff --git a/filedefs.json b/filedefs.json
index 76dccfffee..3422eeb252 100644
--- a/filedefs.json
+++ b/filedefs.json
@@ -23,7 +23,8 @@
},
"armv8.6-a": {
"cxxflags": ["-march=armv8.6-a+fp16"],
- "cppdefines": ["ARM_COMPUTE_ENABLE_FP16"]
+ "cppdefines": ["ARM_COMPUTE_ENABLE_FP16", "ARM_COMPUTE_ENABLE_BF16",
+ "ARM_COMPUTE_ENABLE_I8MM"]
},
"armv8.6-a-sve": {
"cxxflags": ["-march=armv8.6-a+sve+fp16+dotprod"],
@@ -37,4 +38,4 @@
}
}
}
-} \ No newline at end of file
+}
diff --git a/src/core/NEON/wrapper/intrinsics/cvt.h b/src/core/NEON/wrapper/intrinsics/cvt.h
index 6e79a92bc2..e52e3dd0c4 100644
--- a/src/core/NEON/wrapper/intrinsics/cvt.h
+++ b/src/core/NEON/wrapper/intrinsics/cvt.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020, 2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -72,7 +72,7 @@ vcvt(const float32x4_t &a)
return vcvtq_s32_f32(a);
}
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
/** Convert 2x128-bit floating point vectors into 1x128-bit bfloat16 vector
*
* @param[in] inptr Pointer to the input memory to load values from
@@ -89,7 +89,7 @@ inline void vcvt_bf16_f32(const float *inptr, uint16_t *outptr)
: [outptr] "r"(outptr)
: "v0", "v1", "memory");
}
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
} // namespace wrapper
} // namespace arm_compute
diff --git a/src/core/common/Registrars.h b/src/core/common/Registrars.h
index cc76de2be5..42c1aaa9fa 100644
--- a/src/core/common/Registrars.h
+++ b/src/core/common/Registrars.h
@@ -167,10 +167,10 @@
#define REGISTER_INTEGER_SVE2(func_name) nullptr
#endif /* defined(ENABLE_INTEGER_KERNELS) */
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
#define REGISTER_BF16_NEON(func_name) &(func_name)
-#else /* !(defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16))*/
+#else /* !(defined(ARM_COMPUTE_ENABLE_BF16))*/
#define REGISTER_BF16_NEON(func_name) nullptr
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)*/
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16)*/
#endif /* SRC_CORE_COMMON_REGISTRARS_H */
diff --git a/src/cpu/kernels/CpuIm2ColKernel.cpp b/src/cpu/kernels/CpuIm2ColKernel.cpp
index 875d66594f..25ff6c291c 100644
--- a/src/cpu/kernels/CpuIm2ColKernel.cpp
+++ b/src/cpu/kernels/CpuIm2ColKernel.cpp
@@ -359,11 +359,11 @@ void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
case DataType::F32:
_func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, true> : &CpuIm2ColKernel::run_im2col<float, true, true>;
break;
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
case DataType::BFLOAT16:
_func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, true> : &CpuIm2ColKernel::run_im2col<bfloat16, true, true>;
break;
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, true> : &CpuIm2ColKernel::run_im2col<float16_t, true, true>;
@@ -385,11 +385,11 @@ void CpuIm2ColKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const
case DataType::F32:
_func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float, false, false> : &CpuIm2ColKernel::run_im2col<float, true, false>;
break;
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
case DataType::BFLOAT16:
_func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<bfloat16, false, false> : &CpuIm2ColKernel::run_im2col<bfloat16, true, false>;
break;
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
_func = (!conv_info.has_padding()) ? &CpuIm2ColKernel::run_im2col<float16_t, false, false> : &CpuIm2ColKernel::run_im2col<float16_t, true, false>;
@@ -453,4 +453,4 @@ size_t CpuIm2ColKernel::get_mws(const CPUInfo &platform, size_t thread_count) co
}
} // namespace kernels
} // namespace cpu
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/cpu/kernels/cast/generic/neon/bfloat16.cpp b/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
index aac4ef4ca0..eed537039f 100644
--- a/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
+++ b/src/cpu/kernels/cast/generic/neon/bfloat16.cpp
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
#include "arm_compute/core/TensorInfo.h"
#include "src/core/NEON/wrapper/wrapper.h"
@@ -142,4 +142,4 @@ void neon_bfloat16_to_fp32_cast(const ITensor *_src, ITensor *_dst, const Thread
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
index 5694a3d9ee..558ff41a5c 100644
--- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
+++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
@@ -156,8 +156,8 @@ public:
const std::vector<int32_t> &multipliers);
// Inherited methods overridden:
- void run(ITensorPack &tensors) override;
- void prepare(ITensorPack &tensors) override;
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
bool is_configured() const override;
experimental::MemoryRequirements workspace() const override;
bool isVarWeightsKernel() const override
@@ -210,12 +210,12 @@ private:
/** Indirect buffer */
std::unique_ptr<const TypeInput *const *, free_delete> _indirect_arg{};
std::unique_ptr<const TypeInput *, free_delete> _indirect_buf{};
- std::vector<TypeInput> _indirect_pad{};
- arm_gemm::ConvolutionParameters _cp{};
- experimental::MemoryRequirements _aux_mem{ Count };
- bool _B_pretranspose_required{ false };
- bool _is_b_constant{ true };
- bool _is_c_constant{ true };
+ std::vector<TypeInput> _indirect_pad{};
+ arm_gemm::ConvolutionParameters _cp{};
+ experimental::MemoryRequirements _aux_mem{ Count };
+ bool _B_pretranspose_required{ false };
+ bool _is_b_constant{ true };
+ bool _is_c_constant{ true };
};
template <typename TypeInput, typename TypeOutput, class OutputStage>
@@ -712,14 +712,14 @@ Status CpuGemmAssemblyDispatch::has_opt_impl(arm_gemm::WeightFormat &expected_we
}
break;
#endif /* __aarch64__ */
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
case DataType::BFLOAT16:
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(arm_gemm::has_opt_gemm<bfloat16, float, arm_gemm::Nothing>(args, {})),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(arm_gemm::has_opt_gemm<bfloat16, float, arm_gemm::Nothing>(expected_weight_format, args, {})),
"We could not find an optimized kernel for BFLOAT16 input and F32 output");
break;
}
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(arm_gemm::has_opt_gemm<float16_t, float16_t, arm_gemm::Nothing>(expected_weight_format, args, {})),
@@ -821,11 +821,11 @@ void CpuGemmAssemblyDispatch::configure(const ITensorInfo *a, const ITensorInfo
}
break;
#endif /* __aarch64__ */
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
case DataType::BFLOAT16:
create_arm_gemm<bfloat16, float>(_arm_gemm, a, b, c, d, act, info);
break;
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
create_arm_gemm<float16_t, float16_t>(_arm_gemm, a, b, c, d, act, info);
diff --git a/support/Bfloat16.h b/support/Bfloat16.h
index 173f2d16e2..5fd45cf209 100644
--- a/support/Bfloat16.h
+++ b/support/Bfloat16.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,7 @@ namespace
inline uint16_t float_to_bf16(const float v)
{
const uint32_t *fromptr = reinterpret_cast<const uint32_t *>(&v);
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
uint16_t res;
__asm __volatile(
@@ -50,7 +50,7 @@ inline uint16_t float_to_bf16(const float v)
:
: [fromptr] "r"(fromptr), [toptr] "r"(&res)
: "v0", "memory");
-#else /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#else /* defined(ARM_COMPUTE_ENABLE_BF16) */
uint16_t res = (*fromptr >> 16);
const uint16_t error = (*fromptr & 0x0000ffff);
uint16_t bf_l = res & 0x0001;
@@ -58,7 +58,7 @@ inline uint16_t float_to_bf16(const float v)
{
res += 1;
}
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
return res;
}
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 3b385d4724..940983f42b 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -820,22 +820,27 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
}
TEST_SUITE(Float)
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
TEST_SUITE(BFLOAT16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::BFLOAT16)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
}
TEST_SUITE_END() // BFLOAT16
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::F16)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
@@ -845,7 +850,9 @@ TEST_SUITE_END() // FP16
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
ActivationFunctionsDataset))
{
// Validate output
@@ -887,8 +894,11 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ QuantizedActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -914,8 +924,11 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })),
+ QuantizedActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -1069,7 +1082,10 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ ActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
@@ -1093,8 +1109,11 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+ QuantizedActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -1103,8 +1122,11 @@ TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
- framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
- framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })),
+ QuantizedActivationFunctionsDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
index 5649e5a556..378652c24f 100644
--- a/tests/validation/NEON/DepthConvertLayer.cpp
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -342,7 +342,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, frame
}
TEST_SUITE_END() // S16_to_S32
-#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
+#if defined(ARM_COMPUTE_ENABLE_BF16)
TEST_SUITE(BFLOAT16_to_F32)
FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<bfloat16>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerBF16toF32Dataset),
framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -362,7 +362,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToBF16Fixture<float>, framew
validate(Accessor(_target), _reference);
}
TEST_SUITE_END() // F32_to_BFLOAT16
-#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(F16_to_QASYMM8)
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index a41b3cc9ae..23e73f6a9e 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -472,6 +472,16 @@ inline ::std::ostream &operator<<(::std::ostream &os, const BoundingBoxTransform
return os;
}
+#if defined(ARM_COMPUTE_ENABLE_BF16)
+inline ::std::ostream &operator<<(::std::ostream &os, const bfloat16& v)
+{
+ std::stringstream str;
+ str << v;
+ os << str.str();
+ return os;
+}
+#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */
+
/** Formatted output of the BoundingBoxTransformInfo type.
*
* @param[in] bbox_info Type to output.