diff options
author | Pablo Tello <pablo.tello@arm.com> | 2018-03-20 16:46:55 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:49:16 +0000 |
commit | 99ef8407cd5b27fdec6f8dfaf8b55f820b6dea71 (patch) | |
tree | 7d7448ebc71d20c15611076375eb0cbb22f83f5a /src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp | |
parent | 2d9de0a3fa6ad858e70040124f362799a962bb6a (diff) | |
download | ComputeLibrary-99ef8407cd5b27fdec6f8dfaf8b55f820b6dea71.tar.gz |
COMPMID-881: Updated arm_gemm to the lastest
Change-Id: Iba2664f33320e79bd15ca9c1399e65e4cc165be6
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125265
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp index b9729d4c5c..484892dc81 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp @@ -21,7 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC + +// This can only be built if the target/compiler supports FP16 arguments. +#ifdef __ARM_FP16_ARGS #include "arm_gemm.hpp" @@ -40,26 +42,38 @@ UniqueGemmCommon<__fp16, __fp16> gemm(const CPUInfo &ci, const unsigned int M, c const int maxthreads, const bool pretransposed_hint) { #ifdef __aarch64__ - /* If FP16 is supported, use it */ - if(ci.has_fp16()) + + // Only consider the native FP16 kernel if it will get built. +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || defined(FP16_KERNELS) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + // If the compiler is configured to enable this feature always, then assume it is available at runtime too. + const bool use_fp16 = true; +#else + // Otherwise, detect at runtime via CPUInfo. + const bool use_fp16 = ci.has_fp16(); +#endif + + // If FP16 is supported, use it. + if(use_fp16) { return UniqueGemmCommon<__fp16, __fp16>(new GemmInterleaved<hgemm_24x8, __fp16, __fp16>(&ci, M, N, K, trA, trB, alpha, beta, maxthreads, pretransposed_hint)); } +#endif - /* Fallback to using the blocked SGEMM kernel. */ + // Fallback to using the blocked SGEMM kernel. return UniqueGemmCommon<__fp16, __fp16>(new GemmInterleaved<sgemm_12x8, __fp16, __fp16>(&ci, M, N, K, trA, trB, alpha, beta, maxthreads, pretransposed_hint)); #else - /* For AArch32, only support the SGEMM route. */ + // For AArch32, only support the SGEMM route for now. return UniqueGemmCommon<__fp16, __fp16>(new GemmInterleaved<sgemm_8x6, __fp16, __fp16>(&ci, M, N, K, trA, trB, alpha, beta, maxthreads, pretransposed_hint)); #endif } -// Instantiate static class members -#ifdef __aarch64__ +// Instantiate static class members if necessary. +#if defined(__aarch64__) && (defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || defined(FP16_KERNELS)) const int hgemm_24x8::out_width; const int hgemm_24x8::out_height; #endif } // namespace arm_gemm -#endif // __ARM_FEATURE_FP16_SCALAR_ARITHMETIC +#endif // __ARM_FP16_ARGS |