diff options
Diffstat (limited to 'src/cpu/kernels/maxunpool/generic/neon/impl.h')
-rw-r--r-- | src/cpu/kernels/maxunpool/generic/neon/impl.h | 25 |
1 files changed, 18 insertions, 7 deletions
diff --git a/src/cpu/kernels/maxunpool/generic/neon/impl.h b/src/cpu/kernels/maxunpool/generic/neon/impl.h index 3fea9cfcf3..5fe19c4707 100644 --- a/src/cpu/kernels/maxunpool/generic/neon/impl.h +++ b/src/cpu/kernels/maxunpool/generic/neon/impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,19 +21,30 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H -#define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H +#ifndef ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H +#define ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Window.h" #include "src/core/NEON/wrapper/wrapper.h" namespace arm_compute { -class ITensor; -class Window; namespace cpu { template <typename T> -void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window); +void max_unpooling(const ITensor *input, const ITensor *indices, ITensor *output, const Window &window) +{ + Iterator input_itr(input, window); + Iterator indices_itr(indices, window); + auto out_ptr = reinterpret_cast<T *>(output->buffer()); + const int out_stride_w = static_cast<int>(output->info()->strides_in_bytes()[3]); + execute_window_loop(window, [&](const Coordinates & id) + { + auto vindices = reinterpret_cast<uint32_t *>(indices_itr.ptr()); + auto vinput = reinterpret_cast<T *>(input_itr.ptr()); + out_ptr[id[3] * out_stride_w / sizeof(T) + *vindices] = *vinput; + }, + input_itr, indices_itr); +} } // namespace cpu } // namespace arm_compute -#endif //define SRC_CORE_SVE_KERNELS_MAXUNPOOLING_IMPL_H +#endif // ACL_SRC_CPU_KERNELS_MAXUNPOOL_GENERIC_NEON_IMPL_H |