diff options
Diffstat (limited to 'src/cpu/kernels/scale/sve')
-rw-r--r-- | src/cpu/kernels/scale/sve/fp16.cpp | 75 | ||||
-rw-r--r-- | src/cpu/kernels/scale/sve/fp32.cpp | 76 | ||||
-rw-r--r-- | src/cpu/kernels/scale/sve/integer.cpp | 145 | ||||
-rw-r--r-- | src/cpu/kernels/scale/sve/list.h | 8 | ||||
-rw-r--r-- | src/cpu/kernels/scale/sve/qasymm8.cpp | 74 | ||||
-rw-r--r-- | src/cpu/kernels/scale/sve/qasymm8_signed.cpp | 74 |
6 files changed, 275 insertions, 177 deletions
diff --git a/src/cpu/kernels/scale/sve/fp16.cpp b/src/cpu/kernels/scale/sve/fp16.cpp index ceda19f366..cb28f4cb1c 100644 --- a/src/cpu/kernels/scale/sve/fp16.cpp +++ b/src/cpu/kernels/scale/sve/fp16.cpp @@ -27,9 +27,10 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" + +#include "src/core/helpers/ScaleHelpers.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/ScaleHelpers.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -41,8 +42,12 @@ namespace arm_compute { namespace { -void fp16_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offsets, - float sampling_offset, bool align_corners, const Window &window) +void fp16_sve_scale_nearest(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + float sampling_offset, + bool align_corners, + const Window &window) { const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right; const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom; @@ -61,38 +66,50 @@ void fp16_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *off const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes(); const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3]; - execute_window_loop(win, [&](const Coordinates & id) - { - const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; - const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); - const int offset_row = in_hi * in_stride_wc; - const auto in_ptr = reinterpret_cast<const float16_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); - const auto out_ptr = reinterpret_cast<float16_t *>(out.ptr()); - - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b16(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &id) { - // Store results - svst1_f16(pg, out_ptr + x, svld1_f16(pg, in_ptr + offset + offset_row + x)); + const int32_t offset = + *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; + const auto in_hi = static_cast<int>( + align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) + : std::floor((id.z() + sampling_offset) * hr)); + const int offset_row = in_hi * in_stride_wc; + const auto in_ptr = reinterpret_cast<const float16_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); + const auto out_ptr = reinterpret_cast<float16_t *>(out.ptr()); - x += svcntw(); - pg = svwhilelt_b16(x, window_end_x); - } - while(svptest_any(svptrue_b16(), pg)); - }, - out); -} + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b16(x, window_end_x); + do + { + // Store results + svst1_f16(pg, out_ptr + x, svld1_f16(pg, in_ptr + offset + offset_row + x)); + + x += svcntw(); + pg = svwhilelt_b16(x, window_end_x); + } while (svptest_any(svptrue_b16(), pg)); + }, + out); } +} // namespace namespace cpu { -void fp16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, - bool align_corners, const Window &window) +void fp16_sve_scale(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + const ITensor *dx, + const ITensor *dy, + InterpolationPolicy policy, + BorderMode border_mode, + PixelValue constant_border_value, + float sampling_offset, + bool align_corners, + const Window &window) { ARM_COMPUTE_UNUSED(dx, dy, border_mode, constant_border_value); - if(policy == InterpolationPolicy::NEAREST_NEIGHBOR) + if (policy == InterpolationPolicy::NEAREST_NEIGHBOR) { fp16_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window); } @@ -103,4 +120,4 @@ void fp16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, co } } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
\ No newline at end of file +#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/scale/sve/fp32.cpp b/src/cpu/kernels/scale/sve/fp32.cpp index f3472f1efd..cbb345edbb 100644 --- a/src/cpu/kernels/scale/sve/fp32.cpp +++ b/src/cpu/kernels/scale/sve/fp32.cpp @@ -25,23 +25,27 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" + +#include "src/core/helpers/ScaleHelpers.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/ScaleHelpers.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" +#include <arm_sve.h> #include <cmath> #include <cstddef> -#include <arm_sve.h> - namespace arm_compute { namespace { -void fp32_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offsets, - float sampling_offset, bool align_corners, const Window &window) +void fp32_sve_scale_nearest(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + float sampling_offset, + bool align_corners, + const Window &window) { const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right; const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom; @@ -60,38 +64,50 @@ void fp32_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *off const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes(); const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3]; - execute_window_loop(win, [&](const Coordinates & id) - { - const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; - const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); - const int offset_row = in_hi * in_stride_wc; - const auto in_ptr = reinterpret_cast<const float *>(in_ptr_start + in_stride_bytes_hwc * id[3]); - const auto out_ptr = reinterpret_cast<float *>(out.ptr()); - - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b32(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &id) { - // Store results - svst1_f32(pg, out_ptr + x, svld1_f32(pg, in_ptr + offset + offset_row + x)); + const int32_t offset = + *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; + const auto in_hi = static_cast<int>( + align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) + : std::floor((id.z() + sampling_offset) * hr)); + const int offset_row = in_hi * in_stride_wc; + const auto in_ptr = reinterpret_cast<const float *>(in_ptr_start + in_stride_bytes_hwc * id[3]); + const auto out_ptr = reinterpret_cast<float *>(out.ptr()); - x += svcntw(); - pg = svwhilelt_b32(x, window_end_x); - } - while(svptest_any(svptrue_b32(), pg)); - }, - out); -} + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b32(x, window_end_x); + do + { + // Store results + svst1_f32(pg, out_ptr + x, svld1_f32(pg, in_ptr + offset + offset_row + x)); + + x += svcntw(); + pg = svwhilelt_b32(x, window_end_x); + } while (svptest_any(svptrue_b32(), pg)); + }, + out); } +} // namespace namespace cpu { -void fp32_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, - bool align_corners, const Window &window) +void fp32_sve_scale(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + const ITensor *dx, + const ITensor *dy, + InterpolationPolicy policy, + BorderMode border_mode, + PixelValue constant_border_value, + float sampling_offset, + bool align_corners, + const Window &window) { ARM_COMPUTE_UNUSED(dx, dy, border_mode, constant_border_value); - if(policy == InterpolationPolicy::NEAREST_NEIGHBOR) + if (policy == InterpolationPolicy::NEAREST_NEIGHBOR) { fp32_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window); } diff --git a/src/cpu/kernels/scale/sve/integer.cpp b/src/cpu/kernels/scale/sve/integer.cpp index 82c70ee360..df950b1789 100644 --- a/src/cpu/kernels/scale/sve/integer.cpp +++ b/src/cpu/kernels/scale/sve/integer.cpp @@ -25,9 +25,10 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" + +#include "src/core/helpers/ScaleHelpers.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/ScaleHelpers.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -39,8 +40,12 @@ namespace arm_compute { namespace { -void u8_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offsets, - float sampling_offset, bool align_corners, const Window &window) +void u8_sve_scale_nearest(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + float sampling_offset, + bool align_corners, + const Window &window) { const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right; const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom; @@ -59,32 +64,40 @@ void u8_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offse const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes(); const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3]; - execute_window_loop(win, [&](const Coordinates & id) - { - const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; - const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); - const int offset_row = in_hi * in_stride_wc; - const auto in_ptr = reinterpret_cast<const uint8_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); - const auto out_ptr = reinterpret_cast<uint8_t *>(out.ptr()); - - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b8(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &id) { - // Store results - svst1_u8(pg, out_ptr + x, svld1_u8(pg, in_ptr + offset + offset_row + x)); - - x += svcntw(); - pg = svwhilelt_b8(x, window_end_x); - } - while(svptest_any(svptrue_b8(), pg)); - }, - out); + const int32_t offset = + *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; + const auto in_hi = static_cast<int>( + align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) + : std::floor((id.z() + sampling_offset) * hr)); + const int offset_row = in_hi * in_stride_wc; + const auto in_ptr = reinterpret_cast<const uint8_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); + const auto out_ptr = reinterpret_cast<uint8_t *>(out.ptr()); + + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b8(x, window_end_x); + do + { + // Store results + svst1_u8(pg, out_ptr + x, svld1_u8(pg, in_ptr + offset + offset_row + x)); + + x += svcntw(); + pg = svwhilelt_b8(x, window_end_x); + } while (svptest_any(svptrue_b8(), pg)); + }, + out); } -void s16_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offsets, - float sampling_offset, bool align_corners, const Window &window) +void s16_sve_scale_nearest(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + float sampling_offset, + bool align_corners, + const Window &window) { const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right; const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom; @@ -103,38 +116,50 @@ void s16_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offs const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes(); const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3]; - execute_window_loop(win, [&](const Coordinates & id) - { - const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; - const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); - const int offset_row = in_hi * in_stride_wc; - const auto in_ptr = reinterpret_cast<const int16_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); - const auto out_ptr = reinterpret_cast<int16_t *>(out.ptr()); - - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b16(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &id) { - // Store results - svst1_s16(pg, out_ptr + x, svld1_s16(pg, in_ptr + offset + offset_row + x)); - - x += svcntw(); - pg = svwhilelt_b16(x, window_end_x); - } - while(svptest_any(svptrue_b16(), pg)); - }, - out); -} + const int32_t offset = + *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; + const auto in_hi = static_cast<int>( + align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) + : std::floor((id.z() + sampling_offset) * hr)); + const int offset_row = in_hi * in_stride_wc; + const auto in_ptr = reinterpret_cast<const int16_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); + const auto out_ptr = reinterpret_cast<int16_t *>(out.ptr()); + + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b16(x, window_end_x); + do + { + // Store results + svst1_s16(pg, out_ptr + x, svld1_s16(pg, in_ptr + offset + offset_row + x)); + + x += svcntw(); + pg = svwhilelt_b16(x, window_end_x); + } while (svptest_any(svptrue_b16(), pg)); + }, + out); } +} // namespace namespace cpu { -void u8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, - bool align_corners, const Window &window) +void u8_sve_scale(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + const ITensor *dx, + const ITensor *dy, + InterpolationPolicy policy, + BorderMode border_mode, + PixelValue constant_border_value, + float sampling_offset, + bool align_corners, + const Window &window) { ARM_COMPUTE_UNUSED(dx, dy, border_mode, constant_border_value); - if(policy == InterpolationPolicy::NEAREST_NEIGHBOR) + if (policy == InterpolationPolicy::NEAREST_NEIGHBOR) { u8_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window); } @@ -144,12 +169,20 @@ void u8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, cons } } -void s16_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, - bool align_corners, const Window &window) +void s16_sve_scale(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + const ITensor *dx, + const ITensor *dy, + InterpolationPolicy policy, + BorderMode border_mode, + PixelValue constant_border_value, + float sampling_offset, + bool align_corners, + const Window &window) { ARM_COMPUTE_UNUSED(dx, dy, border_mode, constant_border_value); - if(policy == InterpolationPolicy::NEAREST_NEIGHBOR) + if (policy == InterpolationPolicy::NEAREST_NEIGHBOR) { s16_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window); } diff --git a/src/cpu/kernels/scale/sve/list.h b/src/cpu/kernels/scale/sve/list.h index b9c3a10a78..aff741a4a7 100644 --- a/src/cpu/kernels/scale/sve/list.h +++ b/src/cpu/kernels/scale/sve/list.h @@ -28,10 +28,10 @@ namespace arm_compute { namespace cpu { -#define DECLARE_SCALE_KERNEL(func_name) \ - void func_name(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, \ - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, \ - bool align_corners, const Window &window) +#define DECLARE_SCALE_KERNEL(func_name) \ + void func_name(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, \ + InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, \ + float sampling_offset, bool align_corners, const Window &window) DECLARE_SCALE_KERNEL(fp16_sve_scale); DECLARE_SCALE_KERNEL(fp32_sve_scale); diff --git a/src/cpu/kernels/scale/sve/qasymm8.cpp b/src/cpu/kernels/scale/sve/qasymm8.cpp index d45a69e43b..0fc794c6c2 100644 --- a/src/cpu/kernels/scale/sve/qasymm8.cpp +++ b/src/cpu/kernels/scale/sve/qasymm8.cpp @@ -25,10 +25,10 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" + +#include "src/core/helpers/ScaleHelpers.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/ScaleHelpers.h" -#include "src/core/helpers/ScaleHelpers.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -40,8 +40,12 @@ namespace arm_compute { namespace { -void qasymm8_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offsets, - float sampling_offset, bool align_corners, const Window &window) +void qasymm8_sve_scale_nearest(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + float sampling_offset, + bool align_corners, + const Window &window) { const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right; const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom; @@ -60,38 +64,50 @@ void qasymm8_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor * const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes(); const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3]; - execute_window_loop(win, [&](const Coordinates & id) - { - const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; - const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); - const int offset_row = in_hi * in_stride_wc; - const auto in_ptr = reinterpret_cast<const uint8_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); - const auto out_ptr = reinterpret_cast<uint8_t *>(out.ptr()); - - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b8(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &id) { - // Store results - svst1_u8(pg, out_ptr + x, svld1_u8(pg, in_ptr + offset + offset_row + x)); + const int32_t offset = + *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; + const auto in_hi = static_cast<int>( + align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) + : std::floor((id.z() + sampling_offset) * hr)); + const int offset_row = in_hi * in_stride_wc; + const auto in_ptr = reinterpret_cast<const uint8_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); + const auto out_ptr = reinterpret_cast<uint8_t *>(out.ptr()); - x += svcntw(); - pg = svwhilelt_b8(x, window_end_x); - } - while(svptest_any(svptrue_b8(), pg)); - }, - out); -} + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b8(x, window_end_x); + do + { + // Store results + svst1_u8(pg, out_ptr + x, svld1_u8(pg, in_ptr + offset + offset_row + x)); + + x += svcntw(); + pg = svwhilelt_b8(x, window_end_x); + } while (svptest_any(svptrue_b8(), pg)); + }, + out); } +} // namespace namespace cpu { -void qasymm8_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, - bool align_corners, const Window &window) +void qasymm8_sve_scale(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + const ITensor *dx, + const ITensor *dy, + InterpolationPolicy policy, + BorderMode border_mode, + PixelValue constant_border_value, + float sampling_offset, + bool align_corners, + const Window &window) { ARM_COMPUTE_UNUSED(dx, dy, border_mode, constant_border_value); - if(policy == InterpolationPolicy::NEAREST_NEIGHBOR) + if (policy == InterpolationPolicy::NEAREST_NEIGHBOR) { qasymm8_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window); } diff --git a/src/cpu/kernels/scale/sve/qasymm8_signed.cpp b/src/cpu/kernels/scale/sve/qasymm8_signed.cpp index 67bca65f58..68ea01e29e 100644 --- a/src/cpu/kernels/scale/sve/qasymm8_signed.cpp +++ b/src/cpu/kernels/scale/sve/qasymm8_signed.cpp @@ -25,10 +25,10 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" + +#include "src/core/helpers/ScaleHelpers.h" #include "src/core/NEON/NEMath.h" #include "src/core/NEON/wrapper/wrapper.h" -#include "src/core/helpers/ScaleHelpers.h" -#include "src/core/helpers/ScaleHelpers.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -40,8 +40,12 @@ namespace arm_compute { namespace { -void qasymm8_signed_sve_scale_nearest(const ITensor *src, ITensor *dst, const ITensor *offsets, - float sampling_offset, bool align_corners, const Window &window) +void qasymm8_signed_sve_scale_nearest(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + float sampling_offset, + bool align_corners, + const Window &window) { const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right; const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom; @@ -60,38 +64,50 @@ void qasymm8_signed_sve_scale_nearest(const ITensor *src, ITensor *dst, const IT const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes(); const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3]; - execute_window_loop(win, [&](const Coordinates & id) - { - const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; - const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); - const int offset_row = in_hi * in_stride_wc; - const auto in_ptr = reinterpret_cast<const int8_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); - const auto out_ptr = reinterpret_cast<int8_t *>(out.ptr()); - - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b8(x, window_end_x); - do + execute_window_loop( + win, + [&](const Coordinates &id) { - // Store results - svst1_s8(pg, out_ptr + x, svld1_s8(pg, in_ptr + offset + offset_row + x)); + const int32_t offset = + *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c; + const auto in_hi = static_cast<int>( + align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) + : std::floor((id.z() + sampling_offset) * hr)); + const int offset_row = in_hi * in_stride_wc; + const auto in_ptr = reinterpret_cast<const int8_t *>(in_ptr_start + in_stride_bytes_hwc * id[3]); + const auto out_ptr = reinterpret_cast<int8_t *>(out.ptr()); - x += svcntw(); - pg = svwhilelt_b8(x, window_end_x); - } - while(svptest_any(svptrue_b8(), pg)); - }, - out); -} + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b8(x, window_end_x); + do + { + // Store results + svst1_s8(pg, out_ptr + x, svld1_s8(pg, in_ptr + offset + offset_row + x)); + + x += svcntw(); + pg = svwhilelt_b8(x, window_end_x); + } while (svptest_any(svptrue_b8(), pg)); + }, + out); } +} // namespace namespace cpu { -void qasymm8_signed_sve_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, - InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, - bool align_corners, const Window &window) +void qasymm8_signed_sve_scale(const ITensor *src, + ITensor *dst, + const ITensor *offsets, + const ITensor *dx, + const ITensor *dy, + InterpolationPolicy policy, + BorderMode border_mode, + PixelValue constant_border_value, + float sampling_offset, + bool align_corners, + const Window &window) { ARM_COMPUTE_UNUSED(dx, dy, border_mode, constant_border_value); - if(policy == InterpolationPolicy::NEAREST_NEIGHBOR) + if (policy == InterpolationPolicy::NEAREST_NEIGHBOR) { qasymm8_signed_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window); } |