From d02d5edfa15ba6c04a9986a8a362a945cb38ac31 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 22 Jan 2021 09:47:04 +0000 Subject: Integrate improved CPU depthwise convolution kernels * Replace assembly kernels for depthwise convolution with more optimized ones. * Add int8 assembly kernels. * Fix implicit padding on optimized kernels Resolves: COMPMID-3867, COMPMID-4361 Change-Id: I0b0867e05f61be4f368f62190d55e14d0ab3ebf2 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5622 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp') diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index a561b88058..daa5fd5ab9 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -137,9 +137,10 @@ void NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayerOptimizedInternal:: // Allocate memory based on the internal memory requirements experimental::MemoryRequirements mem_req = dwc_optimized_func->workspace(); - _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size }, 1, DataType::S8), mem_req[0].alignment); - _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size }, 1, DataType::S8), mem_req[1].alignment); - + _impl->workspace.allocator()->init(TensorInfo(TensorShape{ mem_req[0].size + mem_req[0].alignment }, 1, DataType::S8), mem_req[0].alignment); + _impl->packed_weights.allocator()->init(TensorInfo(TensorShape{ mem_req[1].size + mem_req[1].alignment }, 1, DataType::S8), mem_req[1].alignment); + _memory_group.manage(&_impl->workspace); + _memory_group.manage(&_impl->packed_weights); _impl->workspace.allocator()->allocate(); _impl->packed_weights.allocator()->allocate(); } -- cgit v1.2.1