From eb84d6b82ba744295f5249d2171ee6537e09751a Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 27 Jul 2018 18:28:10 +0100 Subject: COMPMID-1434: Fix NEWinograd for NHWC and sub-tensors Apply offsets and strides to winograd transform functions in NEON. Change-Id: Ia4f44d22244203a5f9d93d2fed73570396b0d28c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141803 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp') diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index e60fe80e0f..e4a7214c10 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -174,7 +174,7 @@ void Fallback::prepare() if(_gemm_kernel_asm->B_pretranspose_required()) { const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput); - const auto in1_ptr = reinterpret_cast(_b->buffer()); + const auto in1_ptr = reinterpret_cast(_b->buffer() + _b->info()->offset_first_element_in_bytes()); const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput); ARM_COMPUTE_ERROR_ON(_pretranspose.buffer() == nullptr); @@ -223,9 +223,9 @@ void Fallback::run() const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput); const int multi_stride_d = _d->info()->strides_in_bytes()[3] / sizeof(TypeOutput); - const auto in0_ptr = reinterpret_cast(_a->buffer()); - const auto in1_ptr = reinterpret_cast(_b->buffer()); - auto out_ptr = reinterpret_cast(_d->buffer()); + const auto in0_ptr = reinterpret_cast(_a->buffer() + _a->info()->offset_first_element_in_bytes()); + const auto in1_ptr = reinterpret_cast(_b->buffer() + _b->info()->offset_first_element_in_bytes()); + auto out_ptr = reinterpret_cast(_d->buffer() + _d->info()->offset_first_element_in_bytes()); // Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads if(_workspace.buffer() != nullptr) -- cgit v1.2.1