aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuScaleKernel.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2022-08-11 12:15:39 +0100
committerGunes Bayir <gunes.bayir@arm.com>2022-08-18 16:31:44 +0000
commit53929b1fd4dd3c27f5afb5b8626e27605ebe62cf (patch)
tree39d12e25aa5b9d7552f14b5e4838da0e8a4230c8 /src/cpu/kernels/CpuScaleKernel.cpp
parent9ee8a3e542a8f4fa05816f1a4b82543c0deffbba (diff)
downloadComputeLibrary-53929b1fd4dd3c27f5afb5b8626e27605ebe62cf.tar.gz
Use Neon™ kernels for FP Bilinear Resize for SVE
Removes FP Bilinear SVE kernels and uses Neon™ kernels instead Resolves: COMPMID-5449 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I8e01de44bd884cb6578ca0b9358509b69bc31ca2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8100 Benchmark: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuScaleKernel.cpp')
-rw-r--r--src/cpu/kernels/CpuScaleKernel.cpp34
1 files changed, 20 insertions, 14 deletions
diff --git a/src/cpu/kernels/CpuScaleKernel.cpp b/src/cpu/kernels/CpuScaleKernel.cpp
index e230dfa938..c9e858fc02 100644
--- a/src/cpu/kernels/CpuScaleKernel.cpp
+++ b/src/cpu/kernels/CpuScaleKernel.cpp
@@ -52,62 +52,68 @@ static const std::vector<CpuScaleKernel::ScaleKernel> available_kernels =
{
{
"sve_fp16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.sve && data.isa.fp16; },
+ [](const ScaleKernelDataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::F16 && data.isa.sve && data.isa.fp16 && data.interpolation_policy != InterpolationPolicy::BILINEAR;
+ },
REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_scale)
},
{
"sve_fp32_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data)
+ {
+ return data.dt == DataType::F32 && data.isa.sve && data.interpolation_policy != InterpolationPolicy::BILINEAR;
+ },
REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_scale)
},
{
"sve_qu8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8 && data.isa.sve; },
REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_scale)
},
{
"sve_qs8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED && data.isa.sve; },
REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_scale)
},
{
"sve_u8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::U8 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::u8_sve_scale)
},
{
"sve_s16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::S16 && data.isa.sve; },
REGISTER_INTEGER_SVE(arm_compute::cpu::s16_sve_scale)
},
{
"neon_fp16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16; },
REGISTER_FP16_NEON(arm_compute::cpu::common_neon_scale<float16_t>)
},
{
"neon_fp32_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::F32; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::common_neon_scale<float>)
},
{
"neon_qu8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8; },
REGISTER_QASYMM8_NEON(arm_compute::cpu::qasymm8_neon_scale)
},
{
"neon_qs8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::qasymm8_signed_neon_scale)
},
{
"neon_u8_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::U8; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::U8; },
REGISTER_INTEGER_NEON(arm_compute::cpu::u8_neon_scale)
},
{
"neon_s16_scale",
- [](const DataTypeISASelectorData & data) { return data.dt == DataType::S16; },
+ [](const ScaleKernelDataTypeISASelectorData & data) { return data.dt == DataType::S16; },
REGISTER_INTEGER_NEON(arm_compute::cpu::s16_neon_scale)
},
};
@@ -115,7 +121,7 @@ static const std::vector<CpuScaleKernel::ScaleKernel> available_kernels =
Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *dst, const ScaleKernelInfo &info)
{
- const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+ const auto *uk = CpuScaleKernel::get_implementation(ScaleKernelDataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa(), info.interpolation_policy });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
@@ -174,7 +180,7 @@ void CpuScaleKernel::configure(const ITensorInfo *src, const ITensorInfo *dx, co
dst,
info));
- const auto *uk = CpuScaleKernel::get_implementation(DataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa() });
+ const auto *uk = CpuScaleKernel::get_implementation(ScaleKernelDataTypeISASelectorData{ src->data_type(), CPUInfo::get().get_isa(), info.interpolation_policy });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
_run_method = uk->ukernel;