From 65c8db87a2f442cc17ef90d05406e705ca7a9c1e Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 3 Aug 2022 16:39:23 +0100 Subject: Fix for AI benchmark ResNet regression * For 3x3 kernel, only choose the implementation with larger tile size if the input tensor is larger than the tile. Resolves: COMPMID-5467 Signed-off-by: Viet-Hoa Do Change-Id: I2cf95ddb25f477cb05da3b3501e0afe9548fc33a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8022 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/cpu/operators/CpuWinogradConv2d.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/cpu') diff --git a/src/cpu/operators/CpuWinogradConv2d.cpp b/src/cpu/operators/CpuWinogradConv2d.cpp index 7be2d6d230..81cf651b76 100644 --- a/src/cpu/operators/CpuWinogradConv2d.cpp +++ b/src/cpu/operators/CpuWinogradConv2d.cpp @@ -252,9 +252,15 @@ void CpuWinogradConv2d::configure(const ITensorInfo *src, const ITensorInfo *wei _permute_output->configure(&_output_nhwc, dst, PermutationVector(1U, 2U, 0U)); } + // Configure input transform kernel + _transform_input_kernel = std::make_unique(_winograd_impl, *_conv_args, nthreads); + // Configure GEMM function _gemm_function->configure(&_winograd_transformed_input, &_winograd_transformed_weights, nullptr, &_winograd_transformed_output, 1.0f, 0.f); + // Configure output transform kernel + _transform_output_kernel = std::make_unique(_winograd_impl, *_conv_args, nthreads); + //Configure Activation Layer _run_activation = act_info.enabled() && !fuse_function_supported(act_info); if(_run_activation) @@ -331,8 +337,6 @@ void CpuWinogradConv2d::run(ITensorPack &tensors) CpuAuxTensorHandler output_nhwc(offset_int_vec(PermutedOutput), _output_nhwc, tensors, true); ITensorPack transform_input_pack{ { ACL_SRC, is_nchw ? input_nhwc.get() : src }, { ACL_DST, winograd_input_transformed.get() }, { ACL_INT, input_workspace.get() } }; - _transform_input_kernel = std::make_unique(_winograd_impl, *_conv_args, nthreads); - NEScheduler::get().schedule_op(_transform_input_kernel.get(), Window::DimX, win, transform_input_pack); CpuAuxTensorHandler winograd_weights_transformed(offset_int_vec(TransformedWeights), _winograd_transformed_weights, tensors, true); @@ -346,7 +350,6 @@ void CpuWinogradConv2d::run(ITensorPack &tensors) _gemm_function->run(gemm_pack); // Output transform - _transform_output_kernel = std::make_unique(_winograd_impl, *_conv_args, nthreads); ITensorPack transform_output_pack{ { ACL_SRC_0, winograd_output_transformed.get() }, { ACL_DST, is_nchw ? output_nhwc.get() : output }, { ACL_SRC_1, biases }, { ACL_INT, output_workspace.get() } }; NEScheduler::get().schedule_op(_transform_output_kernel.get(), Window::DimX, win, transform_output_pack); if(is_nchw) -- cgit v1.2.1