From 74921eee924625426429044decefe3673561b174 Mon Sep 17 00:00:00 2001 From: Michael Tyler Date: Wed, 12 Apr 2023 17:43:17 +0100 Subject: Update CPU kernel implementations and guard directives Resolves COMPMID-6023 Change-Id: I868975d14c4f98af6716726feda22405a6a4c891 Signed-off-by: Michael Tyler Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9686 Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- .../kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp') diff --git a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp index 4f0de6b61c..04fb532937 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,7 +22,7 @@ * SOFTWARE. */ -#include "src/core/NEON/kernels/arm_gemm/utils.hpp" +#include "utils.hpp" #include @@ -33,8 +33,8 @@ namespace arm_conv { namespace depthwise { -void a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl(const __fp16 *const *const, __fp16 *const *const, const void *, unsigned int, const __fp16, const __fp16); -void a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl(const unsigned int, const unsigned int, const __fp16 *, int64_t, int64_t, __fp16 *, int64_t, int64_t, const void *, unsigned int, const __fp16, const __fp16); +void a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_indirect_impl(const __fp16 *const *const input_ptrs, __fp16 *const *const outptrs, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max); +void a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst_direct_impl(const unsigned int n_tile_rows, const unsigned int n_tile_cols, const __fp16 *inptr, int64_t ld_input_row, int64_t ld_input_col, __fp16 *outptr, int64_t ld_output_row, int64_t ld_output_col, const void *params, unsigned int n_channels, const __fp16 activation_min, const __fp16 activation_max); class a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst : public DepthwiseDepthfirstStrategy<__fp16, __fp16, __fp16, __fp16> { @@ -57,7 +57,7 @@ class a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst : public DepthwiseDepthfirst constexpr static unsigned int output_cols = 4; a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(const CPUInfo *) - : DepthwiseDepthfirstStrategy<__fp16, __fp16, __fp16, __fp16>(4, 3, 1) {} + : Parent(output_rows, output_cols, kernel_rows, kernel_cols, stride_rows, stride_cols) {} arm_gemm::VLType get_vl_type(void) const override { return vl_type; } -- cgit v1.2.1