aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/CL/cl_kernels/nhwc/scale.cl29
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp71
-rw-r--r--src/gpu/cl/kernels/ClScaleKernel.cpp16
3 files changed, 27 insertions, 89 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/scale.cl b/src/core/CL/cl_kernels/nhwc/scale.cl
index bccfd6543a..f6a3e0971b 100644
--- a/src/core/CL/cl_kernels/nhwc/scale.cl
+++ b/src/core/CL/cl_kernels/nhwc/scale.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -218,34 +218,17 @@ __kernel void scale_bilinear_nhwc(
// Calculate the output
out[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
#else // defined(IS_FLOATING_POINT)
- TILE(float, 1, N0, out_f);
- TILE(float, 1, N0, in00_f);
- TILE(float, 1, N0, in01_f);
- TILE(float, 1, N0, in10_f);
- TILE(float, 1, N0, in11_f);
const float a = (xi_f - (float)xi);
const float b = (1.f - a);
const float a1 = (yi_f - (float)yi);
const float b1 = (1.f - a1);
- // Dequantize
- LOOP_UNROLLING(int, n0, 0, 1, N0,
- {
- in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float)OFFSET) * (float)SCALE;
- in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float)OFFSET) * (float)SCALE;
- in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float)OFFSET) * (float)SCALE;
- in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float)OFFSET) * (float)SCALE;
- })
-
- // Calculate the output in the floating-point domain
- out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
-
- // Quantize
- LOOP_UNROLLING(int, n0, 0, 1, N0,
- {
- out[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float)SCALE + (float)OFFSET, DST_DATA_TYPE);
- })
+ out[0].v = CONVERT_SAT((CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
+ (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
+ (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
+ (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1),
+ VEC_DATA_TYPE(DST_DATA_TYPE, N0));
#endif // defined(IS_FLOATING_POINT)
TILE(uint, 1, 1, dst_indirect_y);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
index 7ee79e82af..a2c04d94e5 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
@@ -66,15 +66,15 @@ TILE(uint, 1, 1, g_dst_indirect_y);
if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
{
code += R"_(
- float xi_f = (g_ind_1 * SCALE_X);
- float yi_f = (yo * SCALE_Y);
+ float xi_f = (g_ind_1 * {{SCALE_X}});
+ float yi_f = (yo * {{SCALE_Y}});
)_";
}
else
{
code += R"_(
- float xi_f = ((g_ind_1 + 0.5f) * SCALE_X);
- float yi_f = ((yo + 0.5f) * SCALE_Y);
+ float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}});
+ float yi_f = ((yo + 0.5f) * {{SCALE_Y}});
)_";
}
@@ -98,15 +98,15 @@ TILE(uint, 1, 1, g_dst_indirect_y);
if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
{
code += R"_(
- float xi_f = (g_ind_1 * SCALE_X);
- float yi_f = (yo * SCALE_Y);
+ float xi_f = (g_ind_1 * {{SCALE_X}});
+ float yi_f = (yo * {{SCALE_Y}});
)_";
}
else
{
code += R"_(
- float xi_f = ((g_ind_1 + 0.5f) * SCALE_X - 0.5f);
- float yi_f = ((yo + 0.5f) * SCALE_Y - 0.5f);
+ float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f);
+ float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f);
)_";
}
@@ -150,37 +150,16 @@ TILE(uint, 1, 1, g_dst_indirect_y);
else
{
code += R"_(
- TILE(float, 1, N0, out_f);
- TILE(float, 1, N0, in00_f);
- TILE(float, 1, N0, in01_f);
- TILE(float, 1, N0, in10_f);
- TILE(float, 1, N0, in11_f);
-
const float a = (xi_f - (float)xi);
const float b = (1.f - a);
const float a1 = (yi_f - (float)yi);
const float b1 = (1.f - a1);
-)_"
- // Dequantize
- R"_(
- LOOP_UNROLLING(int, n0, 0, 1, N0,
- {
- in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
- in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
- in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
- in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
- })
-)_"
- // Calculate the output in the floating-point domain
- R"_(
- out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
-)_"
- // Quantize
- R"_(
- LOOP_UNROLLING(int, n0, 0, 1, N0,
- {
- {{dst}}[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float){{SCALE}} + (float){{OFFSET}}, {{DST_DATA_TYPE}});
- })
+
+ {{dst}}[0].v = CONVERT_SAT(
+ (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
+ (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
+ (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
+ (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
)_";
}
}
@@ -231,20 +210,11 @@ TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type());
- const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(_src->data_type())
- && _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR;
+ const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
+ const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
- if(is_qasymm_bilinear)
- {
- const UniformQuantizationInfo qinfo = _src->quantization_info().uniform();
- lut["SCALE"] = support::cpp11::to_string(qinfo.scale);
- lut["OFFSET"] = support::cpp11::to_string(qinfo.offset);
- }
- else
- {
- lut["SCALE"] = support::cpp11::to_string(1);
- lut["OFFSET"] = support::cpp11::to_string(0);
- }
+ lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
+ lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
return lut;
}
@@ -256,16 +226,11 @@ CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWr
const unsigned int m0 = root_window.y().step();
const unsigned int partial_n0 = _dst->dimension(0) % n0;
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
-
CLBuildOptions build_opts;
build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0));
- build_opts.add_option("-DSCALE_X=" + float_to_string_with_full_precision(scale_x));
- build_opts.add_option("-DSCALE_Y=" + float_to_string_with_full_precision(scale_y));
return build_opts;
}
diff --git a/src/gpu/cl/kernels/ClScaleKernel.cpp b/src/gpu/cl/kernels/ClScaleKernel.cpp
index 6f16adc657..7f24aa2eb6 100644
--- a/src/gpu/cl/kernels/ClScaleKernel.cpp
+++ b/src/gpu/cl/kernels/ClScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -102,7 +102,6 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn
float scale_x = 0.f;
float scale_y = 0.f;
std::tie(scale_x, scale_y) = calculate_scale_factors(src, dst, _data_layout, info.align_corners);
- const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(src->data_type()) && info.interpolation_policy == InterpolationPolicy::BILINEAR;
// Area interpolation behaves as Nearest Neighbour in case of up-sampling
auto interpolation_policy_to_use = info.interpolation_policy;
@@ -141,17 +140,6 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn
build_opts.add_option_if(info.align_corners, "-DALIGN_CORNERS");
build_opts.add_option_if(is_data_type_float(src->data_type()), "-DIS_FLOATING_POINT");
build_opts.add_option_if_else(info.sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
- if(is_qasymm_bilinear)
- {
- const UniformQuantizationInfo qinfo = src->quantization_info().uniform();
- build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
- }
- else
- {
- build_opts.add_option("-DSCALE=" + support::cpp11::to_string(1));
- build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(0));
- }
}
else if(_data_layout == DataLayout::NCHW)
{
@@ -169,6 +157,8 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn
build_opts.add_option_if(info.border_mode == BorderMode::CONSTANT, "-DBORDER_MODE_CONSTANT");
build_opts.add_option_if(info.align_corners, "-DALIGN_CORNERS");
build_opts.add_option_if_else(info.sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
+
+ const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(src->data_type()) && info.interpolation_policy == InterpolationPolicy::BILINEAR;
if(is_qasymm_bilinear)
{
const UniformQuantizationInfo qinfo = src->quantization_info().uniform();