diff options
Diffstat (limited to 'Android.bp')
-rw-r--r-- | Android.bp | 1275 |
1 files changed, 895 insertions, 380 deletions
diff --git a/Android.bp b/Android.bp index a4ca5c79e8..ab554a8ca2 100644 --- a/Android.bp +++ b/Android.bp @@ -1,8 +1,136 @@ // -// Copyright © 2020 Arm Ltd. All rights reserved. +// Copyright © 2020-2024 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // +// OpenCL sources are NOT required by ArmNN or its Android NNAPI driver and are used for CI purposes only. +opencl_srcs = [ + "src/core/CL/cl_kernels/activation_float_helpers.h", + "src/core/CL/cl_kernels/activation_quant_helpers.h", + "src/core/CL/cl_kernels/common/activation_layer.cl", + "src/core/CL/cl_kernels/common/activation_layer_quant.cl", + "src/core/CL/cl_kernels/common/arg_min_max.cl", + "src/core/CL/cl_kernels/common/batchnormalization_layer.cl", + "src/core/CL/cl_kernels/common/bitwise_op.cl", + "src/core/CL/cl_kernels/common/bounding_box_transform.cl", + "src/core/CL/cl_kernels/common/bounding_box_transform_quantized.cl", + "src/core/CL/cl_kernels/common/cast.cl", + "src/core/CL/cl_kernels/common/col2im.cl", + "src/core/CL/cl_kernels/common/comparisons.cl", + "src/core/CL/cl_kernels/common/concatenate.cl", + "src/core/CL/cl_kernels/common/convert_fc_weights.cl", + "src/core/CL/cl_kernels/common/convolution_layer.cl", + "src/core/CL/cl_kernels/common/copy_tensor.cl", + "src/core/CL/cl_kernels/common/crop_tensor.cl", + "src/core/CL/cl_kernels/common/deconvolution_layer.cl", + "src/core/CL/cl_kernels/common/dequantization_layer.cl", + "src/core/CL/cl_kernels/common/elementwise_operation.cl", + "src/core/CL/cl_kernels/common/elementwise_operation_quantized.cl", + "src/core/CL/cl_kernels/common/elementwise_unary.cl", + "src/core/CL/cl_kernels/common/elementwise_unary_quantized.cl", + "src/core/CL/cl_kernels/common/fft.cl", + "src/core/CL/cl_kernels/common/fft_digit_reverse.cl", + "src/core/CL/cl_kernels/common/fft_scale.cl", + "src/core/CL/cl_kernels/common/fill_border.cl", + "src/core/CL/cl_kernels/common/floor.cl", + "src/core/CL/cl_kernels/common/gather.cl", + "src/core/CL/cl_kernels/common/gemm.cl", + "src/core/CL/cl_kernels/common/gemm_reshaped_only_rhs_mmul.cl", + "src/core/CL/cl_kernels/common/gemm_utils.cl", + "src/core/CL/cl_kernels/common/gemmlowp.cl", + "src/core/CL/cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.cl", + "src/core/CL/cl_kernels/common/gemv.cl", + "src/core/CL/cl_kernels/common/generate_proposals.cl", + "src/core/CL/cl_kernels/common/generate_proposals_quantized.cl", + "src/core/CL/cl_kernels/common/instance_normalization.cl", + "src/core/CL/cl_kernels/common/l2_normalize.cl", + "src/core/CL/cl_kernels/common/mat_mul.cl", + "src/core/CL/cl_kernels/common/mat_mul_mmul.cl", + "src/core/CL/cl_kernels/common/mat_mul_quantized.cl", + "src/core/CL/cl_kernels/common/mat_mul_quantized_mmul.cl", + "src/core/CL/cl_kernels/common/mean_stddev_normalization.cl", + "src/core/CL/cl_kernels/common/memset.cl", + "src/core/CL/cl_kernels/common/minmax_layer.cl", + "src/core/CL/cl_kernels/common/nonmax.cl", + "src/core/CL/cl_kernels/common/pad_layer.cl", + "src/core/CL/cl_kernels/common/permute.cl", + "src/core/CL/cl_kernels/common/pixelwise_mul_float.cl", + "src/core/CL/cl_kernels/common/pixelwise_mul_int.cl", + "src/core/CL/cl_kernels/common/qlstm_layer_normalization.cl", + "src/core/CL/cl_kernels/common/quantization_layer.cl", + "src/core/CL/cl_kernels/common/range.cl", + "src/core/CL/cl_kernels/common/reduction_operation.cl", + "src/core/CL/cl_kernels/common/reshape_layer.cl", + "src/core/CL/cl_kernels/common/reverse.cl", + "src/core/CL/cl_kernels/common/roi_align_layer.cl", + "src/core/CL/cl_kernels/common/roi_align_layer_quantized.cl", + "src/core/CL/cl_kernels/common/roi_pooling_layer.cl", + "src/core/CL/cl_kernels/common/scatter.cl", + "src/core/CL/cl_kernels/common/select.cl", + "src/core/CL/cl_kernels/common/slice_ops.cl", + "src/core/CL/cl_kernels/common/softmax_layer.cl", + "src/core/CL/cl_kernels/common/stack_layer.cl", + "src/core/CL/cl_kernels/common/tile.cl", + "src/core/CL/cl_kernels/common/transpose.cl", + "src/core/CL/cl_kernels/common/unpooling_layer.cl", + "src/core/CL/cl_kernels/gemm_helpers.h", + "src/core/CL/cl_kernels/helpers.h", + "src/core/CL/cl_kernels/helpers_asymm.h", + "src/core/CL/cl_kernels/load_store_utility.h", + "src/core/CL/cl_kernels/nchw/batch_to_space.cl", + "src/core/CL/cl_kernels/nchw/batchnormalization_layer.cl", + "src/core/CL/cl_kernels/nchw/channel_shuffle.cl", + "src/core/CL/cl_kernels/nchw/depth_to_space.cl", + "src/core/CL/cl_kernels/nchw/dequantization_layer.cl", + "src/core/CL/cl_kernels/nchw/direct_convolution.cl", + "src/core/CL/cl_kernels/nchw/im2col.cl", + "src/core/CL/cl_kernels/nchw/normalization_layer.cl", + "src/core/CL/cl_kernels/nchw/normalize_planar_yuv_layer.cl", + "src/core/CL/cl_kernels/nchw/normalize_planar_yuv_layer_quantized.cl", + "src/core/CL/cl_kernels/nchw/pooling_layer.cl", + "src/core/CL/cl_kernels/nchw/prior_box_layer.cl", + "src/core/CL/cl_kernels/nchw/reorg_layer.cl", + "src/core/CL/cl_kernels/nchw/scale.cl", + "src/core/CL/cl_kernels/nchw/space_to_batch.cl", + "src/core/CL/cl_kernels/nchw/space_to_depth.cl", + "src/core/CL/cl_kernels/nchw/upsample_layer.cl", + "src/core/CL/cl_kernels/nchw/winograd_filter_transform.cl", + "src/core/CL/cl_kernels/nchw/winograd_input_transform.cl", + "src/core/CL/cl_kernels/nchw/winograd_output_transform.cl", + "src/core/CL/cl_kernels/nhwc/batch_to_space.cl", + "src/core/CL/cl_kernels/nhwc/batchnormalization_layer.cl", + "src/core/CL/cl_kernels/nhwc/channel_shuffle.cl", + "src/core/CL/cl_kernels/nhwc/depth_to_space.cl", + "src/core/CL/cl_kernels/nhwc/dequantization_layer.cl", + "src/core/CL/cl_kernels/nhwc/direct_convolution.cl", + "src/core/CL/cl_kernels/nhwc/direct_convolution3d.cl", + "src/core/CL/cl_kernels/nhwc/dwc_native_fp_nhwc.cl", + "src/core/CL/cl_kernels/nhwc/dwc_native_quantized_nhwc.cl", + "src/core/CL/cl_kernels/nhwc/im2col.cl", + "src/core/CL/cl_kernels/nhwc/indirect_convolution.cl", + "src/core/CL/cl_kernels/nhwc/normalization_layer.cl", + "src/core/CL/cl_kernels/nhwc/normalize_planar_yuv_layer.cl", + "src/core/CL/cl_kernels/nhwc/normalize_planar_yuv_layer_quantized.cl", + "src/core/CL/cl_kernels/nhwc/pooling_3d_layer.cl", + "src/core/CL/cl_kernels/nhwc/pooling_3d_layer_quantized.cl", + "src/core/CL/cl_kernels/nhwc/pooling_layer.cl", + "src/core/CL/cl_kernels/nhwc/pooling_layer_quantized.cl", + "src/core/CL/cl_kernels/nhwc/reorg_layer.cl", + "src/core/CL/cl_kernels/nhwc/scale.cl", + "src/core/CL/cl_kernels/nhwc/space_to_batch.cl", + "src/core/CL/cl_kernels/nhwc/space_to_depth.cl", + "src/core/CL/cl_kernels/nhwc/transposed_convolution.cl", + "src/core/CL/cl_kernels/nhwc/upsample_layer.cl", + "src/core/CL/cl_kernels/nhwc/winograd_filter_transform.cl", + "src/core/CL/cl_kernels/nhwc/winograd_input_transform.cl", + "src/core/CL/cl_kernels/nhwc/winograd_output_transform.cl", + "src/core/CL/cl_kernels/repeat.h", + "src/core/CL/cl_kernels/tile_helpers.h", + "src/core/CL/cl_kernels/types.h", + "src/core/CL/cl_kernels/warp_helpers.h", + +] + bootstrap_go_package { name: "arm_compute_library_nn_driver", pkgPath: "arm_compute_library_nn_driver", @@ -29,10 +157,12 @@ arm_compute_library_defaults { "-DEMBEDDED_KERNELS", "-DARM_COMPUTE_ASSERTS_ENABLED", "-DARM_COMPUTE_CPP_SCHEDULER", + "-DENABLE_NEON", + "-DARM_COMPUTE_ENABLE_NEON", "-Wno-unused-parameter", "-DNO_DOT_IN_TOOLCHAIN", - "-no-integrated-as", - "-Wno-implicit-fallthrough" + "-Wno-implicit-fallthrough", + "-fPIC" ], rtti: true, } @@ -43,201 +173,101 @@ cc_library_static { proprietary: true, local_include_dirs: ["build/android-arm64v8a/src/core", "build/android-arm64v8a/src/core/CL", + "compute_kernel_writer/include", "src/core/common", "src/core/helpers", + "src/core/NEON/kernels/arm_gemm", "src/core/NEON/kernels/assembly", "src/core/NEON/kernels/convolution/common", - "src/core/NEON/kernels/convolution/depthwise", - "src/core/NEON/kernels/convolution/winograd"], + "src/core/NEON/kernels/convolution/winograd", + "src/cpu/kernels/assembly"], export_include_dirs: [".", "./include"], srcs: [ + "src/c/AclContext.cpp", + "src/c/AclOperator.cpp", + "src/c/AclQueue.cpp", + "src/c/AclTensor.cpp", + "src/c/AclTensorPack.cpp", + "src/c/AclVersion.cpp", + "src/c/cl/AclOpenClExt.cpp", + "src/c/operators/AclActivation.cpp", + "src/common/AllocatorWrapper.cpp", + "src/common/IOperator.cpp", + "src/common/ITensorV2.cpp", + "src/common/TensorPack.cpp", + "src/common/cpuinfo/CpuInfo.cpp", + "src/common/cpuinfo/CpuIsaInfo.cpp", + "src/common/cpuinfo/CpuModel.cpp", + "src/common/utils/LegacySupport.cpp", "src/core/AccessWindowAutoPadding.cpp", "src/core/AccessWindowStatic.cpp", "src/core/AccessWindowTranspose.cpp", + "src/core/CL/CLCommandBuffer.cpp", + "src/core/CL/CLCompatCommandBuffer.cpp", "src/core/CL/CLCompileContext.cpp", - "src/core/CL/CLCoreRuntimeContext.cpp", "src/core/CL/CLHelpers.cpp", "src/core/CL/CLKernelLibrary.cpp", + "src/core/CL/CLMutableCommandBuffer.cpp", "src/core/CL/CLUtils.cpp", - "src/core/CL/ICLDistribution1D.cpp", - "src/core/CL/ICLHOG.cpp", + "src/core/CL/DefaultLWSHeuristics.cpp", "src/core/CL/ICLKernel.cpp", - "src/core/CL/ICLLut.cpp", - "src/core/CL/ICLMultiHOG.cpp", - "src/core/CL/ICLMultiImage.cpp", "src/core/CL/ICLSimple2DKernel.cpp", "src/core/CL/ICLSimple3DKernel.cpp", "src/core/CL/ICLSimpleKernel.cpp", "src/core/CL/ICLTensor.cpp", "src/core/CL/OpenCL.cpp", - "src/core/CL/gemm/CLGEMMHelpers.cpp", - "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationBifrost.cpp", - "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationMidgard.cpp", - "src/core/CL/gemm/native/CLGEMMNativeKernelConfigurationValhall.cpp", - "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationBifrost.cpp", - "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfigurationValhall.cpp", - "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp", - "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationValhall.cpp", - "src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp", - "src/core/CL/kernels/CLAccumulateKernel.cpp", - "src/core/CL/kernels/CLActivationLayerKernel.cpp", "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp", - "src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp", "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp", "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp", - "src/core/CL/kernels/CLBitwiseAndKernel.cpp", - "src/core/CL/kernels/CLBitwiseNotKernel.cpp", - "src/core/CL/kernels/CLBitwiseOrKernel.cpp", - "src/core/CL/kernels/CLBitwiseXorKernel.cpp", + "src/core/CL/kernels/CLBitwiseKernel.cpp", "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp", - "src/core/CL/kernels/CLBox3x3Kernel.cpp", - "src/core/CL/kernels/CLCannyEdgeKernel.cpp", - "src/core/CL/kernels/CLChannelCombineKernel.cpp", - "src/core/CL/kernels/CLChannelExtractKernel.cpp", "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp", - "src/core/CL/kernels/CLCol2ImKernel.cpp", - "src/core/CL/kernels/CLColorConvertKernel.cpp", "src/core/CL/kernels/CLComparisonKernel.cpp", - "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp", - "src/core/CL/kernels/CLConvolutionKernel.cpp", - "src/core/CL/kernels/CLCopyKernel.cpp", - "src/core/CL/kernels/CLCropKernel.cpp", "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp", "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp", - "src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp", - "src/core/CL/kernels/CLDepthConvertLayerKernel.cpp", "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp", - "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp", - "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp", "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp", - "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.cpp", - "src/core/CL/kernels/CLDequantizationLayerKernel.cpp", - "src/core/CL/kernels/CLDerivativeKernel.cpp", - "src/core/CL/kernels/CLDilateKernel.cpp", - "src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp", - "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp", - "src/core/CL/kernels/CLElementwiseOperationKernel.cpp", - "src/core/CL/kernels/CLErodeKernel.cpp", "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp", "src/core/CL/kernels/CLFFTRadixStageKernel.cpp", "src/core/CL/kernels/CLFFTScaleKernel.cpp", - "src/core/CL/kernels/CLFastCornersKernel.cpp", "src/core/CL/kernels/CLFillBorderKernel.cpp", - "src/core/CL/kernels/CLFlattenLayerKernel.cpp", - "src/core/CL/kernels/CLFloorKernel.cpp", "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.cpp", - "src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp", - "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp", - "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.cpp", - "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.cpp", - "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.cpp", - "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp", - "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.cpp", - "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp", "src/core/CL/kernels/CLGatherKernel.cpp", - "src/core/CL/kernels/CLGaussian3x3Kernel.cpp", - "src/core/CL/kernels/CLGaussian5x5Kernel.cpp", - "src/core/CL/kernels/CLGaussianPyramidKernel.cpp", "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp", - "src/core/CL/kernels/CLHOGDescriptorKernel.cpp", - "src/core/CL/kernels/CLHOGDetectorKernel.cpp", - "src/core/CL/kernels/CLHarrisCornersKernel.cpp", - "src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp", - "src/core/CL/kernels/CLHistogramKernel.cpp", - "src/core/CL/kernels/CLIm2ColKernel.cpp", "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp", - "src/core/CL/kernels/CLIntegralImageKernel.cpp", "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp", - "src/core/CL/kernels/CLLKTrackerKernel.cpp", - "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp", - "src/core/CL/kernels/CLMagnitudePhaseKernel.cpp", "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp", - "src/core/CL/kernels/CLMeanStdDevKernel.cpp", "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp", - "src/core/CL/kernels/CLMedian3x3Kernel.cpp", - "src/core/CL/kernels/CLMemsetKernel.cpp", - "src/core/CL/kernels/CLMinMaxLayerKernel.cpp", - "src/core/CL/kernels/CLMinMaxLocationKernel.cpp", - "src/core/CL/kernels/CLNonLinearFilterKernel.cpp", - "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp", "src/core/CL/kernels/CLNormalizationLayerKernel.cpp", "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp", "src/core/CL/kernels/CLPadLayerKernel.cpp", - "src/core/CL/kernels/CLPermuteKernel.cpp", - "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp", - "src/core/CL/kernels/CLPoolingLayerKernel.cpp", "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp", "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp", - "src/core/CL/kernels/CLQuantizationLayerKernel.cpp", "src/core/CL/kernels/CLROIAlignLayerKernel.cpp", "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp", "src/core/CL/kernels/CLRangeKernel.cpp", "src/core/CL/kernels/CLReductionOperationKernel.cpp", - "src/core/CL/kernels/CLRemapKernel.cpp", "src/core/CL/kernels/CLReorgLayerKernel.cpp", - "src/core/CL/kernels/CLReshapeLayerKernel.cpp", "src/core/CL/kernels/CLReverseKernel.cpp", - "src/core/CL/kernels/CLScaleKernel.cpp", - "src/core/CL/kernels/CLScharr3x3Kernel.cpp", "src/core/CL/kernels/CLSelectKernel.cpp", - "src/core/CL/kernels/CLSobel3x3Kernel.cpp", - "src/core/CL/kernels/CLSobel5x5Kernel.cpp", - "src/core/CL/kernels/CLSobel7x7Kernel.cpp", - "src/core/CL/kernels/CLSoftmaxLayerKernel.cpp", "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp", "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp", "src/core/CL/kernels/CLStackLayerKernel.cpp", "src/core/CL/kernels/CLStridedSliceKernel.cpp", - "src/core/CL/kernels/CLTableLookupKernel.cpp", - "src/core/CL/kernels/CLThresholdKernel.cpp", "src/core/CL/kernels/CLTileKernel.cpp", - "src/core/CL/kernels/CLTransposeKernel.cpp", - "src/core/CL/kernels/CLUpsampleLayerKernel.cpp", - "src/core/CL/kernels/CLWarpAffineKernel.cpp", - "src/core/CL/kernels/CLWarpPerspectiveKernel.cpp", - "src/core/CL/kernels/CLWeightsReshapeKernel.cpp", - "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp", - "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp", - "src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp", - "src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp", - "src/core/CL/kernels/CLWinogradInputTransformKernel.cpp", - "src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp", - "src/core/CL/kernels/CLYOLOLayerKernel.cpp", "src/core/CPP/CPPTypes.cpp", - "src/core/CPP/ICPPSimpleKernel.cpp", "src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp", - "src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp", - "src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp", "src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp", "src/core/CPP/kernels/CPPPermuteKernel.cpp", - "src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp", "src/core/CPP/kernels/CPPTopKVKernel.cpp", "src/core/CPP/kernels/CPPUpsampleKernel.cpp", "src/core/Error.cpp", "src/core/GPUTarget.cpp", - "src/core/HOGInfo.cpp", "src/core/Helpers.cpp", "src/core/IAccessWindow.cpp", - "src/core/IDistribution.cpp", - "src/core/IDistribution1D.cpp", "src/core/IKernel.cpp", "src/core/ITensor.cpp", "src/core/ITensorPack.cpp", - "src/core/MultiImageInfo.cpp", - "src/core/NEON/kernels/NEAbsoluteDifferenceKernel.cpp", - "src/core/NEON/kernels/NEAccumulateKernel.cpp", - "src/core/NEON/kernels/NEActivationLayerKernel.cpp", - "src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp", - "src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp", - "src/core/NEON/kernels/NEBatchConcatenateLayerKernel.cpp", "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp", "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp", "src/core/NEON/kernels/NEBitwiseAndKernel.cpp", @@ -245,186 +275,129 @@ cc_library_static { "src/core/NEON/kernels/NEBitwiseOrKernel.cpp", "src/core/NEON/kernels/NEBitwiseXorKernel.cpp", "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp", - "src/core/NEON/kernels/NEBox3x3Kernel.cpp", - "src/core/NEON/kernels/NECannyEdgeKernel.cpp", - "src/core/NEON/kernels/NEChannelCombineKernel.cpp", - "src/core/NEON/kernels/NEChannelExtractKernel.cpp", "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp", - "src/core/NEON/kernels/NECol2ImKernel.cpp", - "src/core/NEON/kernels/NEColorConvertKernel.cpp", - "src/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.cpp", - "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.cpp", - "src/core/NEON/kernels/NEConvolutionKernel.cpp", - "src/core/NEON/kernels/NECopyKernel.cpp", "src/core/NEON/kernels/NECropKernel.cpp", - "src/core/NEON/kernels/NECumulativeDistributionKernel.cpp", - "src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp", - "src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp", "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp", - "src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp", - "src/core/NEON/kernels/NEDequantizationLayerKernel.cpp", - "src/core/NEON/kernels/NEDerivativeKernel.cpp", - "src/core/NEON/kernels/NEDilateKernel.cpp", - "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp", - "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp", - "src/core/NEON/kernels/NEElementwiseOperationKernel.cpp", - "src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp", - "src/core/NEON/kernels/NEErodeKernel.cpp", "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp", "src/core/NEON/kernels/NEFFTRadixStageKernel.cpp", "src/core/NEON/kernels/NEFFTScaleKernel.cpp", - "src/core/NEON/kernels/NEFastCornersKernel.cpp", - "src/core/NEON/kernels/NEFillArrayKernel.cpp", "src/core/NEON/kernels/NEFillBorderKernel.cpp", - "src/core/NEON/kernels/NEFlattenLayerKernel.cpp", - "src/core/NEON/kernels/NEFloorKernel.cpp", "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp", - "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp", - "src/core/NEON/kernels/NEGEMMLowpReductionKernel.cpp", - "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp", - "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp", - "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.cpp", "src/core/NEON/kernels/NEGatherKernel.cpp", - "src/core/NEON/kernels/NEGaussian3x3Kernel.cpp", - "src/core/NEON/kernels/NEGaussian5x5Kernel.cpp", - "src/core/NEON/kernels/NEGaussianPyramidKernel.cpp", "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp", - "src/core/NEON/kernels/NEHOGDescriptorKernel.cpp", - "src/core/NEON/kernels/NEHOGDetectorKernel.cpp", - "src/core/NEON/kernels/NEHarrisCornersKernel.cpp", - "src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp", - "src/core/NEON/kernels/NEHistogramKernel.cpp", - "src/core/NEON/kernels/NEIm2ColKernel.cpp", "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp", - "src/core/NEON/kernels/NEIntegralImageKernel.cpp", "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp", - "src/core/NEON/kernels/NELKTrackerKernel.cpp", - "src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp", "src/core/NEON/kernels/NELogicalKernel.cpp", - "src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp", - "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp", - "src/core/NEON/kernels/NEMeanStdDevKernel.cpp", "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp", - "src/core/NEON/kernels/NEMedian3x3Kernel.cpp", - "src/core/NEON/kernels/NEMemsetKernel.cpp", - "src/core/NEON/kernels/NEMinMaxLayerKernel.cpp", - "src/core/NEON/kernels/NEMinMaxLocationKernel.cpp", - "src/core/NEON/kernels/NENonLinearFilterKernel.cpp", - "src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp", "src/core/NEON/kernels/NENormalizationLayerKernel.cpp", "src/core/NEON/kernels/NEPadLayerKernel.cpp", - "src/core/NEON/kernels/NEPermuteKernel.cpp", - "src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp", - "src/core/NEON/kernels/NEPoolingLayerKernel.cpp", "src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp", "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp", - "src/core/NEON/kernels/NEQuantizationLayerKernel.cpp", "src/core/NEON/kernels/NEROIAlignLayerKernel.cpp", "src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp", "src/core/NEON/kernels/NERangeKernel.cpp", "src/core/NEON/kernels/NEReductionOperationKernel.cpp", - "src/core/NEON/kernels/NERemapKernel.cpp", + "src/core/NEON/kernels/NEReorderKernel.cpp", "src/core/NEON/kernels/NEReorgLayerKernel.cpp", - "src/core/NEON/kernels/NEReshapeLayerKernel.cpp", "src/core/NEON/kernels/NEReverseKernel.cpp", - "src/core/NEON/kernels/NEScaleKernel.cpp", - "src/core/NEON/kernels/NEScharr3x3Kernel.cpp", "src/core/NEON/kernels/NESelectKernel.cpp", - "src/core/NEON/kernels/NESobel3x3Kernel.cpp", - "src/core/NEON/kernels/NESobel5x5Kernel.cpp", - "src/core/NEON/kernels/NESobel7x7Kernel.cpp", - "src/core/NEON/kernels/NESoftmaxLayerKernel.cpp", "src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp", "src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp", "src/core/NEON/kernels/NEStackLayerKernel.cpp", "src/core/NEON/kernels/NEStridedSliceKernel.cpp", - "src/core/NEON/kernels/NETableLookupKernel.cpp", - "src/core/NEON/kernels/NEThresholdKernel.cpp", "src/core/NEON/kernels/NETileKernel.cpp", - "src/core/NEON/kernels/NETransposeKernel.cpp", - "src/core/NEON/kernels/NEUpsampleLayerKernel.cpp", - "src/core/NEON/kernels/NEWarpKernel.cpp", - "src/core/NEON/kernels/NEWeightsReshapeKernel.cpp", - "src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp", - "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.cpp", - "src/core/NEON/kernels/NEYOLOLayerKernel.cpp", - "src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp", - "src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp", - "src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp", - "src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp", - "src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp", + "src/core/NEON/kernels/arm_conv/addressing.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_common.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_strategies_common.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/generic_quantized_dot_product.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp16.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_fp32.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp", + "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp", "src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_bf16bf16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp", "src/core/NEON/kernels/arm_gemm/gemm_int16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_int8.cpp", "src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp", "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp", + "src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp", "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", + "src/core/NEON/kernels/arm_gemm/interleave-8way.cpp", + "src/core/NEON/kernels/arm_gemm/interleave_indirect-sve.cpp", "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp", "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", + "src/core/NEON/kernels/arm_gemm/misc-sve.cpp", "src/core/NEON/kernels/arm_gemm/misc.cpp", "src/core/NEON/kernels/arm_gemm/quantized.cpp", "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp", "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp", + "src/core/NEON/kernels/arm_gemm/transform-sve.cpp", + "src/core/NEON/kernels/arm_gemm/transform.cpp", + "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp", + "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp", "src/core/NEON/kernels/convolution/common/padding.cpp", "src/core/NEON/kernels/convolution/common/qasymm8.cpp", "src/core/NEON/kernels/convolution/common/qsymm8.cpp", "src/core/NEON/kernels/convolution/common/utils.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_1x1_fp32_fp32.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_2x2_3x3_2x2_fp32_fp32.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_1x1_fp32_fp32.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_3x3_3x3_2x2_fp32_fp32.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_4x4_3x3_1x1_fp32_fp32.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_dilated_qa8_qa8.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_fp16.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_fp32.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_pack_parameters.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_qa8_qa8.cpp", - "src/core/NEON/kernels/convolution/depthwise/depthwise_qs8_qs8.cpp", - "src/core/NEON/kernels/convolution/winograd/padding.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp", - "src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp", - "src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp", - "src/core/PyramidInfo.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/arm_fp32_1x8.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/arm_fp32_4x4.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/arm_fp32_6x6.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms_fp16.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms_fp32.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x2_1x7.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x4_1x5.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_1x6_1x3.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_3x3.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_2x2_5x5.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/arm_fp32_4x4_3x3.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms_fp16.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_3x3.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_2x2_5x5.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/arm_fp32_4x4_3x3.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x2_1x7.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x4_1x5.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x6_1x3.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms_fp16.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms_fp32.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_fp16.cpp", + "src/core/NEON/kernels/convolution/winograd/winograd_fp32.cpp", "src/core/Rounding.cpp", "src/core/Size2D.cpp", + "src/core/Size3D.cpp", "src/core/SubTensorInfo.cpp", "src/core/TensorInfo.cpp", "src/core/Utils.cpp", "src/core/Validate.cpp", "src/core/Version.cpp", + "src/core/helpers/LUTManager.cpp", "src/core/helpers/SoftmaxHelpers.cpp", + "src/core/helpers/Utils.cpp", "src/core/helpers/WindowHelpers.cpp", + "src/core/utils/ActivationFunctionUtils.cpp", + "src/core/utils/AssemblyUtils.cpp", + "src/core/utils/DataLayoutUtils.cpp", + "src/core/utils/DataTypeUtils.cpp", + "src/core/utils/FormatUtils.cpp", + "src/core/utils/InterpolationPolicyUtils.cpp", + "src/core/utils/Math.cpp", "src/core/utils/ScaleUtils.cpp", + "src/core/utils/StringUtils.cpp", "src/core/utils/helpers/fft.cpp", "src/core/utils/helpers/tensor_transform.cpp", "src/core/utils/io/FileHandler.cpp", @@ -434,21 +407,375 @@ cc_library_static { "src/core/utils/logging/LoggerRegistry.cpp", "src/core/utils/misc/MMappedFile.cpp", "src/core/utils/quantization/AsymmHelpers.cpp", + "src/cpu/CpuContext.cpp", + "src/cpu/CpuQueue.cpp", + "src/cpu/CpuTensor.cpp", + "src/cpu/kernels/CpuActivationKernel.cpp", + "src/cpu/kernels/CpuAddKernel.cpp", + "src/cpu/kernels/CpuAddMulAddKernel.cpp", + "src/cpu/kernels/CpuCastKernel.cpp", + "src/cpu/kernels/CpuCol2ImKernel.cpp", + "src/cpu/kernels/CpuConcatenateBatchKernel.cpp", + "src/cpu/kernels/CpuConcatenateDepthKernel.cpp", + "src/cpu/kernels/CpuConcatenateHeightKernel.cpp", + "src/cpu/kernels/CpuConcatenateWidthKernel.cpp", + "src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp", + "src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp", + "src/cpu/kernels/CpuCopyKernel.cpp", + "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp", + "src/cpu/kernels/CpuDequantizeKernel.cpp", + "src/cpu/kernels/CpuDirectConv2dKernel.cpp", + "src/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp", + "src/cpu/kernels/CpuDirectConv3dKernel.cpp", + "src/cpu/kernels/CpuElementwiseKernel.cpp", + "src/cpu/kernels/CpuElementwiseUnaryKernel.cpp", + "src/cpu/kernels/CpuFillKernel.cpp", + "src/cpu/kernels/CpuFloorKernel.cpp", + "src/cpu/kernels/CpuGemmInterleave4x4Kernel.cpp", + "src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp", + "src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.cpp", + "src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.cpp", + "src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp", + "src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.cpp", + "src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp", + "src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp", + "src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp", + "src/cpu/kernels/CpuGemmMatrixAdditionKernel.cpp", + "src/cpu/kernels/CpuGemmMatrixMultiplyKernel.cpp", + "src/cpu/kernels/CpuGemmTranspose1xWKernel.cpp", + "src/cpu/kernels/CpuIm2ColKernel.cpp", + "src/cpu/kernels/CpuMaxUnpoolingLayerKernel.cpp", + "src/cpu/kernels/CpuMulKernel.cpp", + "src/cpu/kernels/CpuPermuteKernel.cpp", + "src/cpu/kernels/CpuPool2dKernel.cpp", + "src/cpu/kernels/CpuPool3dKernel.cpp", + "src/cpu/kernels/CpuQuantizeKernel.cpp", + "src/cpu/kernels/CpuReshapeKernel.cpp", + "src/cpu/kernels/CpuScaleKernel.cpp", + "src/cpu/kernels/CpuSoftmaxKernel.cpp", + "src/cpu/kernels/CpuSubKernel.cpp", + "src/cpu/kernels/CpuTransposeKernel.cpp", + "src/cpu/kernels/CpuWeightsReshapeKernel.cpp", + "src/cpu/kernels/CpuWinogradConv2dKernel.cpp", + "src/cpu/kernels/activation/generic/neon/fp16.cpp", + "src/cpu/kernels/activation/generic/neon/fp32.cpp", + "src/cpu/kernels/activation/generic/neon/lut.cpp", + "src/cpu/kernels/activation/generic/neon/qasymm8.cpp", + "src/cpu/kernels/activation/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/activation/generic/neon/qsymm16.cpp", + "src/cpu/kernels/add/generic/neon/fp16.cpp", + "src/cpu/kernels/add/generic/neon/fp32.cpp", + "src/cpu/kernels/add/generic/neon/impl.cpp", + "src/cpu/kernels/add/generic/neon/integer.cpp", + "src/cpu/kernels/add/generic/neon/qasymm8.cpp", + "src/cpu/kernels/add/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/add/generic/neon/qsymm16.cpp", + "src/cpu/kernels/addmuladd/generic/neon/fp16.cpp", + "src/cpu/kernels/addmuladd/generic/neon/fp32.cpp", + "src/cpu/kernels/addmuladd/generic/neon/qasymm8.cpp", + "src/cpu/kernels/addmuladd/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/boundingboxtransform/generic/neon/fp16.cpp", + "src/cpu/kernels/boundingboxtransform/generic/neon/fp32.cpp", + "src/cpu/kernels/boundingboxtransform/generic/neon/impl.cpp", + "src/cpu/kernels/boundingboxtransform/generic/neon/qsymm16.cpp", + "src/cpu/kernels/cast/generic/neon/fp16.cpp", + "src/cpu/kernels/crop/generic/neon/fp16.cpp", + "src/cpu/kernels/crop/generic/neon/fp32.cpp", + "src/cpu/kernels/crop/generic/neon/integer.cpp", + "src/cpu/kernels/depth_to_space/nchw/any/impl.cpp", + "src/cpu/kernels/depth_to_space/nhwc/any/impl.cpp", + "src/cpu/kernels/depthwiseconv2d/generic/neon/fp16.cpp", + "src/cpu/kernels/depthwiseconv2d/generic/neon/fp32.cpp", + "src/cpu/kernels/depthwiseconv2d/generic/neon/impl.cpp", + "src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8.cpp", + "src/cpu/kernels/depthwiseconv2d/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/directconv2d/nchw/all.cpp", + "src/cpu/kernels/directconv2d/nchw/fp16.cpp", + "src/cpu/kernels/directconv2d/nhwc/neon/fp16.cpp", + "src/cpu/kernels/directconv2d/nhwc/neon/fp32.cpp", + "src/cpu/kernels/directconv2d/nhwc/neon/impl.cpp", + "src/cpu/kernels/directconv2d/nhwc/neon/qasymm8.cpp", + "src/cpu/kernels/elementwise_binary/generic/neon/fp16.cpp", + "src/cpu/kernels/elementwise_binary/generic/neon/fp32.cpp", + "src/cpu/kernels/elementwise_binary/generic/neon/integer.cpp", + "src/cpu/kernels/elementwise_binary/generic/neon/qasymm8.cpp", + "src/cpu/kernels/elementwise_binary/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/elementwise_unary/generic/neon/fp16.cpp", + "src/cpu/kernels/elementwise_unary/generic/neon/fp32.cpp", + "src/cpu/kernels/elementwise_unary/generic/neon/integer.cpp", + "src/cpu/kernels/elementwise_unary/generic/neon/q8.cpp", + "src/cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp", + "src/cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/floor/neon/fp16.cpp", + "src/cpu/kernels/floor/neon/fp32.cpp", + "src/cpu/kernels/fuse_batch_normalization/generic/fp16.cpp", + "src/cpu/kernels/fuse_batch_normalization/generic/fp32.cpp", + "src/cpu/kernels/fuse_batch_normalization/nchw/all.cpp", + "src/cpu/kernels/fuse_batch_normalization/nchw/neon/fp16.cpp", + "src/cpu/kernels/fuse_batch_normalization/nchw/neon/fp32.cpp", + "src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp16.cpp", + "src/cpu/kernels/fuse_batch_normalization/nhwc/neon/fp32.cpp", + "src/cpu/kernels/gemm_matrix_add/generic/neon/fp16.cpp", + "src/cpu/kernels/gemm_matrix_add/generic/neon/fp32.cpp", + "src/cpu/kernels/gemm_matrix_add/generic/neon/impl.cpp", + "src/cpu/kernels/gemm_matrix_mul/generic/neon/fp16.cpp", + "src/cpu/kernels/gemm_matrix_mul/generic/neon/fp32.cpp", + "src/cpu/kernels/gemm_matrix_mul/generic/neon/impl.cpp", + "src/cpu/kernels/genproposals/generic/neon/fp16.cpp", + "src/cpu/kernels/genproposals/generic/neon/fp32.cpp", + "src/cpu/kernels/genproposals/generic/neon/impl.cpp", + "src/cpu/kernels/genproposals/generic/neon/qsymm16.cpp", + "src/cpu/kernels/instancenorm/generic/neon/fp16.cpp", + "src/cpu/kernels/instancenorm/generic/neon/fp32.cpp", + "src/cpu/kernels/instancenorm/generic/neon/impl.cpp", + "src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp", + "src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp", + "src/cpu/kernels/l2normlayer/generic/neon/fp16.cpp", + "src/cpu/kernels/l2normlayer/generic/neon/fp32.cpp", + "src/cpu/kernels/lut/generic/neon/u8.cpp", + "src/cpu/kernels/maxunpool/generic/neon/fp16.cpp", + "src/cpu/kernels/maxunpool/generic/neon/fp32.cpp", + "src/cpu/kernels/maxunpool/generic/neon/qasymm8.cpp", + "src/cpu/kernels/maxunpool/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/meanstddevnorm/generic/neon/fp16.cpp", + "src/cpu/kernels/meanstddevnorm/generic/neon/fp32.cpp", + "src/cpu/kernels/meanstddevnorm/generic/neon/impl.cpp", + "src/cpu/kernels/meanstddevnorm/generic/neon/qasymm8.cpp", + "src/cpu/kernels/mul/generic/neon/fp16.cpp", + "src/cpu/kernels/mul/generic/neon/fp32.cpp", + "src/cpu/kernels/norm_layer/generic/neon/fp16.cpp", + "src/cpu/kernels/norm_layer/generic/neon/fp32.cpp", + "src/cpu/kernels/pool2d/neon/fp16.cpp", + "src/cpu/kernels/pool2d/neon/fp32.cpp", + "src/cpu/kernels/pool2d/neon/nchw/all.cpp", + "src/cpu/kernels/pool2d/neon/qasymm8.cpp", + "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp", + "src/cpu/kernels/pool3d/neon/fp16.cpp", + "src/cpu/kernels/pool3d/neon/fp32.cpp", + "src/cpu/kernels/pool3d/neon/qasymm8.cpp", + "src/cpu/kernels/pool3d/neon/qasymm8_signed.cpp", + "src/cpu/kernels/range/generic/neon/fp16.cpp", + "src/cpu/kernels/range/generic/neon/fp32.cpp", + "src/cpu/kernels/range/generic/neon/integer.cpp", + "src/cpu/kernels/roialign/generic/neon/fp16.cpp", + "src/cpu/kernels/roialign/generic/neon/fp32.cpp", + "src/cpu/kernels/roialign/generic/neon/qasymm8.cpp", + "src/cpu/kernels/roialign/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/scale/neon/fp16.cpp", + "src/cpu/kernels/scale/neon/integer.cpp", + "src/cpu/kernels/scale/neon/qasymm8.cpp", + "src/cpu/kernels/scale/neon/qasymm8_signed.cpp", + "src/cpu/kernels/select/generic/neon/fp16.cpp", + "src/cpu/kernels/select/generic/neon/fp32.cpp", + "src/cpu/kernels/select/generic/neon/integer.cpp", + "src/cpu/kernels/softmax/generic/neon/fp16.cpp", + "src/cpu/kernels/softmax/generic/neon/fp32.cpp", + "src/cpu/kernels/softmax/generic/neon/impl.cpp", + "src/cpu/kernels/softmax/generic/neon/qasymm8.cpp", + "src/cpu/kernels/softmax/generic/neon/qasymm8_signed.cpp", + "src/cpu/kernels/sub/neon/fp16.cpp", + "src/cpu/kernels/sub/neon/qasymm8.cpp", + "src/cpu/kernels/sub/neon/qasymm8_signed.cpp", + "src/cpu/kernels/sub/neon/qsymm16.cpp", + "src/cpu/operators/CpuActivation.cpp", + "src/cpu/operators/CpuAdd.cpp", + "src/cpu/operators/CpuAddMulAdd.cpp", + "src/cpu/operators/CpuCast.cpp", + "src/cpu/operators/CpuConcatenate.cpp", + "src/cpu/operators/CpuConv2d.cpp", + "src/cpu/operators/CpuConvertFullyConnectedWeights.cpp", + "src/cpu/operators/CpuCopy.cpp", + "src/cpu/operators/CpuDepthwiseConv2d.cpp", + "src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp", + "src/cpu/operators/CpuDequantize.cpp", + "src/cpu/operators/CpuDirectConv2d.cpp", + "src/cpu/operators/CpuDirectConv3d.cpp", + "src/cpu/operators/CpuElementwise.cpp", + "src/cpu/operators/CpuElementwiseUnary.cpp", + "src/cpu/operators/CpuFill.cpp", + "src/cpu/operators/CpuFlatten.cpp", + "src/cpu/operators/CpuFloor.cpp", + "src/cpu/operators/CpuFullyConnected.cpp", + "src/cpu/operators/CpuGemm.cpp", + "src/cpu/operators/CpuGemmConv2d.cpp", + "src/cpu/operators/CpuGemmDirectConv2d.cpp", + "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp", + "src/cpu/operators/CpuGemmLowpOutputStage.cpp", + "src/cpu/operators/CpuMatMul.cpp", + "src/cpu/operators/CpuMaxUnpooling.cpp", + "src/cpu/operators/CpuMul.cpp", + "src/cpu/operators/CpuPermute.cpp", + "src/cpu/operators/CpuPool2d.cpp", + "src/cpu/operators/CpuPool3d.cpp", + "src/cpu/operators/CpuQuantize.cpp", + "src/cpu/operators/CpuReshape.cpp", + "src/cpu/operators/CpuScale.cpp", + "src/cpu/operators/CpuSoftmax.cpp", + "src/cpu/operators/CpuSub.cpp", + "src/cpu/operators/CpuTranspose.cpp", + "src/cpu/operators/CpuWinogradConv2d.cpp", + "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp", + "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp", + "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp", + "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/MatMulAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp", + "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp", + "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp", + "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp", + "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp", + "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", + "src/gpu/cl/ClContext.cpp", + "src/gpu/cl/ClKernelLibrary.cpp", + "src/gpu/cl/ClQueue.cpp", + "src/gpu/cl/ClTensor.cpp", + "src/gpu/cl/kernels/ClActivationKernel.cpp", + "src/gpu/cl/kernels/ClBatchConcatenateKernel.cpp", + "src/gpu/cl/kernels/ClCastKernel.cpp", + "src/gpu/cl/kernels/ClCol2ImKernel.cpp", + "src/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp", + "src/gpu/cl/kernels/ClCopyKernel.cpp", + "src/gpu/cl/kernels/ClCropKernel.cpp", + "src/gpu/cl/kernels/ClDepthConcatenateKernel.cpp", + "src/gpu/cl/kernels/ClDequantizeKernel.cpp", + "src/gpu/cl/kernels/ClDirectConv2dKernel.cpp", + "src/gpu/cl/kernels/ClDirectConv3dKernel.cpp", + "src/gpu/cl/kernels/ClElementwiseKernel.cpp", + "src/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp", + "src/gpu/cl/kernels/ClFillKernel.cpp", + "src/gpu/cl/kernels/ClFloorKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp", + "src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp", + "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp", + "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp", + "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp", + "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp", + "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp", + "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp", + "src/gpu/cl/kernels/ClHeightConcatenateKernel.cpp", + "src/gpu/cl/kernels/ClIm2ColKernel.cpp", + "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.cpp", + "src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp", + "src/gpu/cl/kernels/ClMatMulLowpNativeKernel.cpp", + "src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.cpp", + "src/gpu/cl/kernels/ClMatMulNativeKernel.cpp", + "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp", + "src/gpu/cl/kernels/ClMulKernel.cpp", + "src/gpu/cl/kernels/ClPermuteKernel.cpp", + "src/gpu/cl/kernels/ClPool2dKernel.cpp", + "src/gpu/cl/kernels/ClPool3dKernel.cpp", + "src/gpu/cl/kernels/ClQuantizeKernel.cpp", + "src/gpu/cl/kernels/ClReshapeKernel.cpp", + "src/gpu/cl/kernels/ClScaleKernel.cpp", + "src/gpu/cl/kernels/ClScatterKernel.cpp", + "src/gpu/cl/kernels/ClSoftmaxKernel.cpp", + "src/gpu/cl/kernels/ClTransposeKernel.cpp", + "src/gpu/cl/kernels/ClTransposedConvolutionKernel.cpp", + "src/gpu/cl/kernels/ClWeightsReshapeKernel.cpp", + "src/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp", + "src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp", + "src/gpu/cl/kernels/ClWidthConcatenateKernel.cpp", + "src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp", + "src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp", + "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp", + "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp", + "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp", + "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp", + "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp", + "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp", + "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp", + "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp", + "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp", + "src/gpu/cl/kernels/helpers/MatMulKernelHelpers.cpp", + "src/gpu/cl/operators/ClActivation.cpp", + "src/gpu/cl/operators/ClAdd.cpp", + "src/gpu/cl/operators/ClCast.cpp", + "src/gpu/cl/operators/ClConcatenate.cpp", + "src/gpu/cl/operators/ClConv2d.cpp", + "src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp", + "src/gpu/cl/operators/ClCopy.cpp", + "src/gpu/cl/operators/ClCrop.cpp", + "src/gpu/cl/operators/ClDequantize.cpp", + "src/gpu/cl/operators/ClDirectConv2d.cpp", + "src/gpu/cl/operators/ClDirectConv3d.cpp", + "src/gpu/cl/operators/ClElementwiseOperations.cpp", + "src/gpu/cl/operators/ClElementwiseUnary.cpp", + "src/gpu/cl/operators/ClFill.cpp", + "src/gpu/cl/operators/ClFlatten.cpp", + "src/gpu/cl/operators/ClFloor.cpp", + "src/gpu/cl/operators/ClFullyConnected.cpp", + "src/gpu/cl/operators/ClGemm.cpp", + "src/gpu/cl/operators/ClGemmConv2d.cpp", + "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp", + "src/gpu/cl/operators/ClGemmLowpOutputStage.cpp", + "src/gpu/cl/operators/ClIndirectConv2d.cpp", + "src/gpu/cl/operators/ClLogicalNot.cpp", + "src/gpu/cl/operators/ClMatMul.cpp", + "src/gpu/cl/operators/ClMul.cpp", + "src/gpu/cl/operators/ClPRelu.cpp", + "src/gpu/cl/operators/ClPermute.cpp", + "src/gpu/cl/operators/ClPool2d.cpp", + "src/gpu/cl/operators/ClPool3d.cpp", + "src/gpu/cl/operators/ClQuantize.cpp", + "src/gpu/cl/operators/ClReshape.cpp", + "src/gpu/cl/operators/ClScale.cpp", + "src/gpu/cl/operators/ClScatter.cpp", + "src/gpu/cl/operators/ClSoftmax.cpp", + "src/gpu/cl/operators/ClSub.cpp", + "src/gpu/cl/operators/ClTranspose.cpp", + "src/gpu/cl/operators/ClTransposedConvolution.cpp", + "src/gpu/cl/operators/ClWinogradConv2d.cpp", "src/runtime/Allocator.cpp", "src/runtime/BlobLifetimeManager.cpp", "src/runtime/BlobMemoryPool.cpp", "src/runtime/CL/CLBufferAllocator.cpp", - "src/runtime/CL/CLDistribution1D.cpp", - "src/runtime/CL/CLHOG.cpp", + "src/runtime/CL/CLGEMMHeuristicsHandle.cpp", "src/runtime/CL/CLHelpers.cpp", - "src/runtime/CL/CLLut.cpp", - "src/runtime/CL/CLLutAllocator.cpp", "src/runtime/CL/CLMemory.cpp", "src/runtime/CL/CLMemoryRegion.cpp", - "src/runtime/CL/CLMultiHOG.cpp", - "src/runtime/CL/CLMultiImage.cpp", "src/runtime/CL/CLOperator.cpp", - "src/runtime/CL/CLPyramid.cpp", "src/runtime/CL/CLRuntimeContext.cpp", "src/runtime/CL/CLScheduler.cpp", "src/runtime/CL/CLSubTensor.cpp", @@ -457,8 +784,6 @@ cc_library_static { "src/runtime/CL/CLTuner.cpp", "src/runtime/CL/ICLSimpleFunction.cpp", "src/runtime/CL/Utils.cpp", - "src/runtime/CL/functions/CLAbsoluteDifference.cpp", - "src/runtime/CL/functions/CLAccumulate.cpp", "src/runtime/CL/functions/CLActivationLayer.cpp", "src/runtime/CL/functions/CLArgMinMaxLayer.cpp", "src/runtime/CL/functions/CLBatchNormalizationLayer.cpp", @@ -468,20 +793,15 @@ cc_library_static { "src/runtime/CL/functions/CLBitwiseOr.cpp", "src/runtime/CL/functions/CLBitwiseXor.cpp", "src/runtime/CL/functions/CLBoundingBoxTransform.cpp", - "src/runtime/CL/functions/CLBox3x3.cpp", - "src/runtime/CL/functions/CLCannyEdge.cpp", "src/runtime/CL/functions/CLCast.cpp", - "src/runtime/CL/functions/CLChannelCombine.cpp", - "src/runtime/CL/functions/CLChannelExtract.cpp", "src/runtime/CL/functions/CLChannelShuffleLayer.cpp", - "src/runtime/CL/functions/CLColorConvert.cpp", "src/runtime/CL/functions/CLComparison.cpp", - "src/runtime/CL/functions/CLComputeAllAnchors.cpp", "src/runtime/CL/functions/CLConcatenateLayer.cpp", + "src/runtime/CL/functions/CLConv3D.cpp", "src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp", - "src/runtime/CL/functions/CLConvolution.cpp", "src/runtime/CL/functions/CLConvolutionLayer.cpp", "src/runtime/CL/functions/CLCopy.cpp", + "src/runtime/CL/functions/CLCrop.cpp", "src/runtime/CL/functions/CLCropResize.cpp", "src/runtime/CL/functions/CLDeconvolutionLayer.cpp", "src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp", @@ -489,20 +809,14 @@ cc_library_static { "src/runtime/CL/functions/CLDepthToSpaceLayer.cpp", "src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp", "src/runtime/CL/functions/CLDequantizationLayer.cpp", - "src/runtime/CL/functions/CLDerivative.cpp", - "src/runtime/CL/functions/CLDilate.cpp", "src/runtime/CL/functions/CLDirectConvolutionLayer.cpp", "src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp", - "src/runtime/CL/functions/CLElementWiseUnaryLayer.cpp", "src/runtime/CL/functions/CLElementwiseOperations.cpp", - "src/runtime/CL/functions/CLEqualizeHistogram.cpp", - "src/runtime/CL/functions/CLErode.cpp", + "src/runtime/CL/functions/CLElementwiseUnaryLayer.cpp", "src/runtime/CL/functions/CLFFT1D.cpp", "src/runtime/CL/functions/CLFFT2D.cpp", "src/runtime/CL/functions/CLFFTConvolutionLayer.cpp", - "src/runtime/CL/functions/CLFastCorners.cpp", "src/runtime/CL/functions/CLFill.cpp", - "src/runtime/CL/functions/CLFillBorder.cpp", "src/runtime/CL/functions/CLFlattenLayer.cpp", "src/runtime/CL/functions/CLFloor.cpp", "src/runtime/CL/functions/CLFullyConnectedLayer.cpp", @@ -513,43 +827,25 @@ cc_library_static { "src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp", "src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp", "src/runtime/CL/functions/CLGather.cpp", - "src/runtime/CL/functions/CLGaussian3x3.cpp", - "src/runtime/CL/functions/CLGaussian5x5.cpp", - "src/runtime/CL/functions/CLGaussianPyramid.cpp", "src/runtime/CL/functions/CLGenerateProposalsLayer.cpp", - "src/runtime/CL/functions/CLHOGDescriptor.cpp", - "src/runtime/CL/functions/CLHOGDetector.cpp", - "src/runtime/CL/functions/CLHOGGradient.cpp", - "src/runtime/CL/functions/CLHOGMultiDetection.cpp", - "src/runtime/CL/functions/CLHarrisCorners.cpp", - "src/runtime/CL/functions/CLHistogram.cpp", + "src/runtime/CL/functions/CLIndirectConvolutionLayer.cpp", "src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp", - "src/runtime/CL/functions/CLIntegralImage.cpp", "src/runtime/CL/functions/CLL2NormalizeLayer.cpp", "src/runtime/CL/functions/CLLSTMLayer.cpp", "src/runtime/CL/functions/CLLSTMLayerQuantized.cpp", - "src/runtime/CL/functions/CLLaplacianPyramid.cpp", - "src/runtime/CL/functions/CLLaplacianReconstruct.cpp", - "src/runtime/CL/functions/CLLocallyConnectedLayer.cpp", "src/runtime/CL/functions/CLLogicalAnd.cpp", "src/runtime/CL/functions/CLLogicalNot.cpp", "src/runtime/CL/functions/CLLogicalOr.cpp", - "src/runtime/CL/functions/CLMagnitude.cpp", + "src/runtime/CL/functions/CLMatMul.cpp", "src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp", - "src/runtime/CL/functions/CLMeanStdDev.cpp", "src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp", - "src/runtime/CL/functions/CLMedian3x3.cpp", - "src/runtime/CL/functions/CLMinMaxLocation.cpp", - "src/runtime/CL/functions/CLNonLinearFilter.cpp", - "src/runtime/CL/functions/CLNonMaximaSuppression3x3.cpp", "src/runtime/CL/functions/CLNormalizationLayer.cpp", "src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp", - "src/runtime/CL/functions/CLOpticalFlow.cpp", "src/runtime/CL/functions/CLPReluLayer.cpp", "src/runtime/CL/functions/CLPadLayer.cpp", "src/runtime/CL/functions/CLPermute.cpp", - "src/runtime/CL/functions/CLPhase.cpp", "src/runtime/CL/functions/CLPixelWiseMultiplication.cpp", + "src/runtime/CL/functions/CLPooling3dLayer.cpp", "src/runtime/CL/functions/CLPoolingLayer.cpp", "src/runtime/CL/functions/CLPriorBoxLayer.cpp", "src/runtime/CL/functions/CLQLSTMLayer.cpp", @@ -560,40 +856,32 @@ cc_library_static { "src/runtime/CL/functions/CLRange.cpp", "src/runtime/CL/functions/CLReduceMean.cpp", "src/runtime/CL/functions/CLReductionOperation.cpp", - "src/runtime/CL/functions/CLRemap.cpp", "src/runtime/CL/functions/CLReorgLayer.cpp", "src/runtime/CL/functions/CLReshapeLayer.cpp", "src/runtime/CL/functions/CLReverse.cpp", "src/runtime/CL/functions/CLScale.cpp", - "src/runtime/CL/functions/CLScharr3x3.cpp", + "src/runtime/CL/functions/CLScatter.cpp", "src/runtime/CL/functions/CLSelect.cpp", "src/runtime/CL/functions/CLSlice.cpp", - "src/runtime/CL/functions/CLSobel3x3.cpp", - "src/runtime/CL/functions/CLSobel5x5.cpp", - "src/runtime/CL/functions/CLSobel7x7.cpp", "src/runtime/CL/functions/CLSoftmaxLayer.cpp", "src/runtime/CL/functions/CLSpaceToBatchLayer.cpp", "src/runtime/CL/functions/CLSpaceToDepthLayer.cpp", "src/runtime/CL/functions/CLSplit.cpp", "src/runtime/CL/functions/CLStackLayer.cpp", "src/runtime/CL/functions/CLStridedSlice.cpp", - "src/runtime/CL/functions/CLTableLookup.cpp", - "src/runtime/CL/functions/CLThreshold.cpp", "src/runtime/CL/functions/CLTile.cpp", "src/runtime/CL/functions/CLTranspose.cpp", "src/runtime/CL/functions/CLUnstack.cpp", - "src/runtime/CL/functions/CLUpsampleLayer.cpp", - "src/runtime/CL/functions/CLWarpAffine.cpp", - "src/runtime/CL/functions/CLWarpPerspective.cpp", "src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp", - "src/runtime/CL/functions/CLWinogradInputTransform.cpp", - "src/runtime/CL/functions/CLYOLOLayer.cpp", - "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp", - "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp", - "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp", - "src/runtime/CL/tuners/BifrostTuner.cpp", - "src/runtime/CL/tuners/CLLWSList.cpp", - "src/runtime/CL/tuners/MidgardTuner.cpp", + "src/runtime/CL/gemm/CLGEMMDefaultTypeBifrost.cpp", + "src/runtime/CL/gemm/CLGEMMDefaultTypeMidgard.cpp", + "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp", + "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.cpp", + "src/runtime/CL/mlgo/HeuristicTree.cpp", + "src/runtime/CL/mlgo/MLGOHeuristics.cpp", + "src/runtime/CL/mlgo/MLGOParser.cpp", + "src/runtime/CL/mlgo/Utils.cpp", + "src/runtime/CL/tuners/CLTuningParametersList.cpp", "src/runtime/CPP/CPPScheduler.cpp", "src/runtime/CPP/ICPPSimpleFunction.cpp", "src/runtime/CPP/SingleThreadScheduler.cpp", @@ -604,28 +892,17 @@ cc_library_static { "src/runtime/CPP/functions/CPPPermute.cpp", "src/runtime/CPP/functions/CPPTopKV.cpp", "src/runtime/CPP/functions/CPPUpsample.cpp", - "src/runtime/CPUUtils.cpp", - "src/runtime/DeviceProperties.cpp", - "src/runtime/Distribution1D.cpp", - "src/runtime/HOG.cpp", - "src/runtime/ILutAllocator.cpp", "src/runtime/IScheduler.cpp", "src/runtime/ISimpleLifetimeManager.cpp", "src/runtime/ITensorAllocator.cpp", "src/runtime/IWeightsManager.cpp", - "src/runtime/Lut.cpp", - "src/runtime/LutAllocator.cpp", - "src/runtime/MEMUtils.cpp", "src/runtime/Memory.cpp", "src/runtime/MemoryManagerOnDemand.cpp", - "src/runtime/MultiHOG.cpp", - "src/runtime/MultiImage.cpp", "src/runtime/NEON/INEOperator.cpp", "src/runtime/NEON/INESimpleFunction.cpp", "src/runtime/NEON/INESimpleFunctionNoBorder.cpp", - "src/runtime/NEON/functions/NEAbsoluteDifference.cpp", - "src/runtime/NEON/functions/NEAccumulate.cpp", "src/runtime/NEON/functions/NEActivationLayer.cpp", + "src/runtime/NEON/functions/NEAddMulAdd.cpp", "src/runtime/NEON/functions/NEArgMinMaxLayer.cpp", "src/runtime/NEON/functions/NEArithmeticAddition.cpp", "src/runtime/NEON/functions/NEArithmeticSubtraction.cpp", @@ -636,18 +913,11 @@ cc_library_static { "src/runtime/NEON/functions/NEBitwiseOr.cpp", "src/runtime/NEON/functions/NEBitwiseXor.cpp", "src/runtime/NEON/functions/NEBoundingBoxTransform.cpp", - "src/runtime/NEON/functions/NEBox3x3.cpp", - "src/runtime/NEON/functions/NECannyEdge.cpp", "src/runtime/NEON/functions/NECast.cpp", - "src/runtime/NEON/functions/NEChannelCombine.cpp", - "src/runtime/NEON/functions/NEChannelExtract.cpp", "src/runtime/NEON/functions/NEChannelShuffleLayer.cpp", - "src/runtime/NEON/functions/NECol2Im.cpp", - "src/runtime/NEON/functions/NEColorConvert.cpp", - "src/runtime/NEON/functions/NEComputeAllAnchors.cpp", "src/runtime/NEON/functions/NEConcatenateLayer.cpp", + "src/runtime/NEON/functions/NEConv3D.cpp", "src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp", - "src/runtime/NEON/functions/NEConvolution.cpp", "src/runtime/NEON/functions/NEConvolutionLayer.cpp", "src/runtime/NEON/functions/NECopy.cpp", "src/runtime/NEON/functions/NECropResize.cpp", @@ -656,18 +926,13 @@ cc_library_static { "src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp", "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp", "src/runtime/NEON/functions/NEDequantizationLayer.cpp", - "src/runtime/NEON/functions/NEDerivative.cpp", "src/runtime/NEON/functions/NEDetectionPostProcessLayer.cpp", - "src/runtime/NEON/functions/NEDilate.cpp", "src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp", - "src/runtime/NEON/functions/NEElementwiseOperators.cpp", + "src/runtime/NEON/functions/NEElementwiseOperations.cpp", "src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp", - "src/runtime/NEON/functions/NEEqualizeHistogram.cpp", - "src/runtime/NEON/functions/NEErode.cpp", "src/runtime/NEON/functions/NEFFT1D.cpp", "src/runtime/NEON/functions/NEFFT2D.cpp", "src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp", - "src/runtime/NEON/functions/NEFastCorners.cpp", "src/runtime/NEON/functions/NEFill.cpp", "src/runtime/NEON/functions/NEFillBorder.cpp", "src/runtime/NEON/functions/NEFlattenLayer.cpp", @@ -675,49 +940,26 @@ cc_library_static { "src/runtime/NEON/functions/NEFullyConnectedLayer.cpp", "src/runtime/NEON/functions/NEFuseBatchNormalization.cpp", "src/runtime/NEON/functions/NEGEMM.cpp", - "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp", "src/runtime/NEON/functions/NEGEMMConv2d.cpp", "src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp", - "src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp", "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", - "src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp", "src/runtime/NEON/functions/NEGather.cpp", - "src/runtime/NEON/functions/NEGaussian3x3.cpp", - "src/runtime/NEON/functions/NEGaussian5x5.cpp", - "src/runtime/NEON/functions/NEGaussianPyramid.cpp", "src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp", - "src/runtime/NEON/functions/NEHOGDescriptor.cpp", - "src/runtime/NEON/functions/NEHOGDetector.cpp", - "src/runtime/NEON/functions/NEHOGGradient.cpp", - "src/runtime/NEON/functions/NEHOGMultiDetection.cpp", - "src/runtime/NEON/functions/NEHarrisCorners.cpp", - "src/runtime/NEON/functions/NEHistogram.cpp", - "src/runtime/NEON/functions/NEIm2Col.cpp", "src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp", - "src/runtime/NEON/functions/NEIntegralImage.cpp", "src/runtime/NEON/functions/NEL2NormalizeLayer.cpp", "src/runtime/NEON/functions/NELSTMLayer.cpp", "src/runtime/NEON/functions/NELSTMLayerQuantized.cpp", - "src/runtime/NEON/functions/NELaplacianPyramid.cpp", - "src/runtime/NEON/functions/NELaplacianReconstruct.cpp", - "src/runtime/NEON/functions/NELocallyConnectedLayer.cpp", "src/runtime/NEON/functions/NELogical.cpp", - "src/runtime/NEON/functions/NEMagnitude.cpp", + "src/runtime/NEON/functions/NEMatMul.cpp", "src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp", - "src/runtime/NEON/functions/NEMeanStdDev.cpp", "src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp", - "src/runtime/NEON/functions/NEMedian3x3.cpp", - "src/runtime/NEON/functions/NEMinMaxLocation.cpp", - "src/runtime/NEON/functions/NENonLinearFilter.cpp", - "src/runtime/NEON/functions/NENonMaximaSuppression3x3.cpp", "src/runtime/NEON/functions/NENormalizationLayer.cpp", - "src/runtime/NEON/functions/NEOpticalFlow.cpp", "src/runtime/NEON/functions/NEPReluLayer.cpp", "src/runtime/NEON/functions/NEPadLayer.cpp", "src/runtime/NEON/functions/NEPermute.cpp", - "src/runtime/NEON/functions/NEPhase.cpp", "src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp", + "src/runtime/NEON/functions/NEPooling3dLayer.cpp", "src/runtime/NEON/functions/NEPoolingLayer.cpp", "src/runtime/NEON/functions/NEPriorBoxLayer.cpp", "src/runtime/NEON/functions/NEQLSTMLayer.cpp", @@ -728,40 +970,28 @@ cc_library_static { "src/runtime/NEON/functions/NERange.cpp", "src/runtime/NEON/functions/NEReduceMean.cpp", "src/runtime/NEON/functions/NEReductionOperation.cpp", - "src/runtime/NEON/functions/NERemap.cpp", + "src/runtime/NEON/functions/NEReorderLayer.cpp", "src/runtime/NEON/functions/NEReorgLayer.cpp", "src/runtime/NEON/functions/NEReshapeLayer.cpp", "src/runtime/NEON/functions/NEReverse.cpp", "src/runtime/NEON/functions/NEScale.cpp", - "src/runtime/NEON/functions/NEScharr3x3.cpp", "src/runtime/NEON/functions/NESelect.cpp", "src/runtime/NEON/functions/NESlice.cpp", - "src/runtime/NEON/functions/NESobel3x3.cpp", - "src/runtime/NEON/functions/NESobel5x5.cpp", - "src/runtime/NEON/functions/NESobel7x7.cpp", "src/runtime/NEON/functions/NESoftmaxLayer.cpp", "src/runtime/NEON/functions/NESpaceToBatchLayer.cpp", "src/runtime/NEON/functions/NESpaceToDepthLayer.cpp", "src/runtime/NEON/functions/NESplit.cpp", "src/runtime/NEON/functions/NEStackLayer.cpp", "src/runtime/NEON/functions/NEStridedSlice.cpp", - "src/runtime/NEON/functions/NETableLookup.cpp", - "src/runtime/NEON/functions/NEThreshold.cpp", "src/runtime/NEON/functions/NETile.cpp", "src/runtime/NEON/functions/NETranspose.cpp", "src/runtime/NEON/functions/NEUnstack.cpp", - "src/runtime/NEON/functions/NEUpsampleLayer.cpp", - "src/runtime/NEON/functions/NEWarpAffine.cpp", - "src/runtime/NEON/functions/NEWarpPerspective.cpp", "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp", - "src/runtime/NEON/functions/NEYOLOLayer.cpp", - "src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp", "src/runtime/OMP/OMPScheduler.cpp", "src/runtime/OffsetLifetimeManager.cpp", "src/runtime/OffsetMemoryPool.cpp", "src/runtime/OperatorTensor.cpp", "src/runtime/PoolManager.cpp", - "src/runtime/Pyramid.cpp", "src/runtime/RuntimeContext.cpp", "src/runtime/Scheduler.cpp", "src/runtime/SchedulerFactory.cpp", @@ -770,6 +1000,15 @@ cc_library_static { "src/runtime/Tensor.cpp", "src/runtime/TensorAllocator.cpp", "src/runtime/Utils.cpp", + "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigBifrost.cpp", + "src/runtime/heuristics/direct_conv/ClDirectConvDefaultConfigValhall.cpp", + "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigBifrost.cpp", + "src/runtime/heuristics/dwc_native/ClDWCNativeDefaultConfigValhall.cpp", + "src/runtime/heuristics/dwc_native/ClDWCNativeHeuristicsHelpers.cpp", + "src/runtime/heuristics/indirect_conv/ClIndirectConvDefaultConfigValhall.cpp", + "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.cpp", + "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultVariantValhall.cpp", + "src/runtime/heuristics/matmul_native/ClMatMulNativeHelpers.cpp", "utils/CommonGraphOptions.cpp", "utils/GraphUtils.cpp", "utils/Utils.cpp", @@ -786,6 +1025,202 @@ cc_library_static { }, arm64: { srcs: [ + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8qa_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s1_4rows_mla_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_3x3_s2_4rows_mla_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s1_4rows_mla_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_planar_5x5_s2_4rows_mla_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_3x3_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32bf16fp32_planar_5x5_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_3x3_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s1_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sme2_u8s8u8q_planar_5x5_s2_4rows_dot_za/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sme_u8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_bf16fp32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32_mla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffhybrid_fp32bf16fp32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_bf16fp32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp16_mla_8x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_ffinterleaved_fp32_mla_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", @@ -796,28 +1231,50 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemv_fp32_mla_32/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/a510.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/a510.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/a510.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", @@ -829,28 +1286,86 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_bf16fp32_dot_16VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp16fp32fp16_dot_16VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32_mla_16VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_fp32bf16fp32_dot_16VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_s8qa_dot_16VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_gemv_u8qa_dot_16VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8qfp32_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8qfp32_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8qfp32_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/a64fx.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/a64fx.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp16_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/a64fx.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_ffinterleaved_fp32_mla_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/a64_fp16_6x6.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/a64_fp32_6x6.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/sme_fp32_mla_6x6.cpp", + "src/core/NEON/kernels/convolution/winograd/input_transforms/sve_fp32_6x6.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/a64_fp16_4x4_3x3.cpp", + "src/core/NEON/kernels/convolution/winograd/output_transforms/sme_fp32_mopa_4x4_3x3.cpp", + "src/core/NEON/kernels/convolution/winograd/weight_transforms/a64_fp16_4x4_3x3.cpp", ], }, |