diff options
author | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-07-19 12:19:46 +0100 |
---|---|---|
committer | Pablo Marquez Tello <pablo.tello@arm.com> | 2022-07-25 15:56:00 +0000 |
commit | d208f4f1c2da9e407f86022959c32e8ab9a4aa6e (patch) | |
tree | f1dc538edb68fa6e266ad8890582f179b961bfae /src/cpu/operators/internal | |
parent | 962531886eee29527bf027107124a27ce94047eb (diff) | |
download | ComputeLibrary-d208f4f1c2da9e407f86022959c32e8ab9a4aa6e.tar.gz |
Enable march=armv8.6-a in non multi-isa builds
* scons arch=armv8.6-a translates to -march=armv8.6-a
* scons arch=armv8.6-a-sve translates to -march=armv8.6-a+sve
* scons arch=armv8.6-a-sve2 translates to -march=armv8.6-a+sve2
* Resolves COMPMID-5408
Change-Id: I0901e1de864d00109759509af7cc2b5c9ae1cd75
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7943
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/operators/internal')
-rw-r--r-- | src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index 5694a3d9ee..558ff41a5c 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -156,8 +156,8 @@ public: const std::vector<int32_t> &multipliers); // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &tensors) override; + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; bool is_configured() const override; experimental::MemoryRequirements workspace() const override; bool isVarWeightsKernel() const override @@ -210,12 +210,12 @@ private: /** Indirect buffer */ std::unique_ptr<const TypeInput *const *, free_delete> _indirect_arg{}; std::unique_ptr<const TypeInput *, free_delete> _indirect_buf{}; - std::vector<TypeInput> _indirect_pad{}; - arm_gemm::ConvolutionParameters _cp{}; - experimental::MemoryRequirements _aux_mem{ Count }; - bool _B_pretranspose_required{ false }; - bool _is_b_constant{ true }; - bool _is_c_constant{ true }; + std::vector<TypeInput> _indirect_pad{}; + arm_gemm::ConvolutionParameters _cp{}; + experimental::MemoryRequirements _aux_mem{ Count }; + bool _B_pretranspose_required{ false }; + bool _is_b_constant{ true }; + bool _is_c_constant{ true }; }; template <typename TypeInput, typename TypeOutput, class OutputStage> @@ -712,14 +712,14 @@ Status CpuGemmAssemblyDispatch::has_opt_impl(arm_gemm::WeightFormat &expected_we } break; #endif /* __aarch64__ */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) +#if defined(ARM_COMPUTE_ENABLE_BF16) case DataType::BFLOAT16: { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(arm_gemm::has_opt_gemm<bfloat16, float, arm_gemm::Nothing>(args, {})), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(arm_gemm::has_opt_gemm<bfloat16, float, arm_gemm::Nothing>(expected_weight_format, args, {})), "We could not find an optimized kernel for BFLOAT16 input and F32 output"); break; } -#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ +#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(arm_gemm::has_opt_gemm<float16_t, float16_t, arm_gemm::Nothing>(expected_weight_format, args, {})), @@ -821,11 +821,11 @@ void CpuGemmAssemblyDispatch::configure(const ITensorInfo *a, const ITensorInfo } break; #endif /* __aarch64__ */ -#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) +#if defined(ARM_COMPUTE_ENABLE_BF16) case DataType::BFLOAT16: create_arm_gemm<bfloat16, float>(_arm_gemm, a, b, c, d, act, info); break; -#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */ +#endif /* defined(ARM_COMPUTE_ENABLE_BF16) */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: create_arm_gemm<float16_t, float16_t>(_arm_gemm, a, b, c, d, act, info); |