From 74921eee924625426429044decefe3673561b174 Mon Sep 17 00:00:00 2001 From: Michael Tyler Date: Wed, 12 Apr 2023 17:43:17 +0100 Subject: Update CPU kernel implementations and guard directives Resolves COMPMID-6023 Change-Id: I868975d14c4f98af6716726feda22405a6a4c891 Signed-off-by: Michael Tyler Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9686 Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- .../sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp') diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp index f5d4189a47..abdfac5a3f 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,19 +22,19 @@ * SOFTWARE. */ -#include "src/core/NEON/kernels/arm_gemm/utils.hpp" +#include "utils.hpp" #include #pragma once -#if defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) +#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) namespace arm_conv { namespace depthwise { -void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl(const __fp16 *const *const, __fp16 *const *const, const void *, unsigned int, const __fp16, const __fp16); -void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl(const unsigned int, const unsigned int, const __fp16 *, int64_t, int64_t, __fp16 *, int64_t, int64_t, const void *, unsigned int, const __fp16, const __fp16); +void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_indirect_impl(const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max); +void sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst_direct_impl(const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max); class sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst : public DepthwiseDepthfirstStrategy<__fp16, __fp16, __fp16, __fp16> { @@ -57,7 +57,7 @@ class sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst : public DepthwiseDepthfirst constexpr static unsigned int output_cols = 2; sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(const CPUInfo *) - : DepthwiseDepthfirstStrategy<__fp16, __fp16, __fp16, __fp16>(2, 5, 1) {} + : Parent(output_rows, output_cols, kernel_rows, kernel_cols, stride_rows, stride_cols) {} arm_gemm::VLType get_vl_type(void) const override { return vl_type; } @@ -68,4 +68,4 @@ class sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst : public DepthwiseDepthfirst } // namespace depthwise } // namespace arm_conv -#endif // defined(__aarch64__) && defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(__ARM_FP16_ARGS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -- cgit v1.2.1