aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-07-27 18:28:10 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commiteb84d6b82ba744295f5249d2171ee6537e09751a (patch)
treedd736285c3d1c6174fbf5650e55b3f6199e91473 /src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
parentc50da386dc26a6e0a1690a47e72d5fa766e7dba2 (diff)
downloadComputeLibrary-eb84d6b82ba744295f5249d2171ee6537e09751a.tar.gz
COMPMID-1434: Fix NEWinograd for NHWC and sub-tensors
Apply offsets and strides to winograd transform functions in NEON. Change-Id: Ia4f44d22244203a5f9d93d2fed73570396b0d28c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141803 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp')
-rw-r--r--src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index e60fe80e0f..e4a7214c10 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -174,7 +174,7 @@ void Fallback<TypeInput, TypeOutput>::prepare()
if(_gemm_kernel_asm->B_pretranspose_required())
{
const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);
- const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer());
+ const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());
const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);
ARM_COMPUTE_ERROR_ON(_pretranspose.buffer() == nullptr);
@@ -223,9 +223,9 @@ void Fallback<TypeInput, TypeOutput>::run()
const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);
const int multi_stride_d = _d->info()->strides_in_bytes()[3] / sizeof(TypeOutput);
- const auto in0_ptr = reinterpret_cast<const TypeInput *>(_a->buffer());
- const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer());
- auto out_ptr = reinterpret_cast<TypeOutput *>(_d->buffer());
+ const auto in0_ptr = reinterpret_cast<const TypeInput *>(_a->buffer() + _a->info()->offset_first_element_in_bytes());
+ const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer() + _b->info()->offset_first_element_in_bytes());
+ auto out_ptr = reinterpret_cast<TypeOutput *>(_d->buffer() + _d->info()->offset_first_element_in_bytes());
// Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads
if(_workspace.buffer() != nullptr)