3 files changed, 27 insertions, 89 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/scale.cl b/src/core/CL/cl_kernels/nhwc/scale.cl
index bccfd6543a..f6a3e0971b 100644
--- a/src/core/CL/cl_kernels/nhwc/scale.cl
+++ b/src/core/CL/cl_kernels/nhwc/scale.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -218,34 +218,17 @@ __kernel void scale_bilinear_nhwc(
     // Calculate the output
     out[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
 #else  // defined(IS_FLOATING_POINT)
-    TILE(float, 1, N0, out_f);
-    TILE(float, 1, N0, in00_f);
-    TILE(float, 1, N0, in01_f);
-    TILE(float, 1, N0, in10_f);
-    TILE(float, 1, N0, in11_f);
 
     const float a  = (xi_f - (float)xi);
     const float b  = (1.f - a);
     const float a1 = (yi_f - (float)yi);
     const float b1 = (1.f - a1);
 
-    // Dequantize
-    LOOP_UNROLLING(int, n0, 0, 1, N0,
-    {
-        in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float)OFFSET) * (float)SCALE;
-        in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float)OFFSET) * (float)SCALE;
-        in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float)OFFSET) * (float)SCALE;
-        in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float)OFFSET) * (float)SCALE;
-    })
-
-    // Calculate the output in the floating-point domain
-    out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
-
-    // Quantize
-    LOOP_UNROLLING(int, n0, 0, 1, N0,
-    {
-        out[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float)SCALE + (float)OFFSET, DST_DATA_TYPE);
-    })
+    out[0].v = CONVERT_SAT((CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + 
+                   (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) + 
+                   (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + 
+                   (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), 
+                VEC_DATA_TYPE(DST_DATA_TYPE, N0));
 #endif // defined(IS_FLOATING_POINT)
 
     TILE(uint, 1, 1, dst_indirect_y);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
index 7ee79e82af..a2c04d94e5 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
@@ -66,15 +66,15 @@ TILE(uint, 1, 1, g_dst_indirect_y);
         if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
         {
             code += R"_(
-    float xi_f = (g_ind_1 * SCALE_X);
-    float yi_f = (yo * SCALE_Y);
+    float xi_f = (g_ind_1 * {{SCALE_X}});
+    float yi_f = (yo * {{SCALE_Y}});
 )_";
         }
         else
         {
             code += R"_(
-    float xi_f = ((g_ind_1 + 0.5f) * SCALE_X);
-    float yi_f = ((yo + 0.5f) * SCALE_Y);
+    float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}});
+    float yi_f = ((yo + 0.5f) * {{SCALE_Y}});
 )_";
         }
 
@@ -98,15 +98,15 @@ TILE(uint, 1, 1, g_dst_indirect_y);
         if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
         {
             code += R"_(
-    float xi_f = (g_ind_1 * SCALE_X);
-    float yi_f = (yo * SCALE_Y);
+    float xi_f = (g_ind_1 * {{SCALE_X}});
+    float yi_f = (yo * {{SCALE_Y}});
 )_";
         }
         else
         {
             code += R"_(
-    float xi_f = ((g_ind_1 + 0.5f) * SCALE_X - 0.5f);
-    float yi_f = ((yo + 0.5f) * SCALE_Y - 0.5f);
+    float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f);
+    float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f);
 )_";
         }
 
@@ -150,37 +150,16 @@ TILE(uint, 1, 1, g_dst_indirect_y);
         else
         {
             code += R"_(
-    TILE(float, 1, N0, out_f);
-    TILE(float, 1, N0, in00_f);
-    TILE(float, 1, N0, in01_f);
-    TILE(float, 1, N0, in10_f);
-    TILE(float, 1, N0, in11_f);
-
     const float a  = (xi_f - (float)xi);
     const float b  = (1.f - a);
     const float a1 = (yi_f - (float)yi);
     const float b1 = (1.f - a1);
-)_"
-                    // Dequantize
-                    R"_(
-    LOOP_UNROLLING(int, n0, 0, 1, N0,
-    {
-        in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
-        in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
-        in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
-        in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}};
-    })
-)_"
-                    // Calculate the output in the floating-point domain
-                    R"_(
-    out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
-)_"
-                    // Quantize
-                    R"_(
-    LOOP_UNROLLING(int, n0, 0, 1, N0,
-    {
-        {{dst}}[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float){{SCALE}} + (float){{OFFSET}}, {{DST_DATA_TYPE}});
-    })
+
+    {{dst}}[0].v = CONVERT_SAT(
+        (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + 
+        (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
+        (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + 
+        (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
 )_";
         }
     }
@@ -231,20 +210,11 @@ TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const
     lut["DST_DATA_TYPE"]   = get_cl_type_from_data_type(_dst->data_type());
     lut["CONSTANT_VALUE"]  = string_from_pixel_value(0, _src->data_type());
 
-    const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(_src->data_type())
-                                    && _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR;
+    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
+    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
 
-    if(is_qasymm_bilinear)
-    {
-        const UniformQuantizationInfo qinfo = _src->quantization_info().uniform();
-        lut["SCALE"]                        = support::cpp11::to_string(qinfo.scale);
-        lut["OFFSET"]                       = support::cpp11::to_string(qinfo.offset);
-    }
-    else
-    {
-        lut["SCALE"]  = support::cpp11::to_string(1);
-        lut["OFFSET"] = support::cpp11::to_string(0);
-    }
+    lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
+    lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
 
     return lut;
 }
@@ -256,16 +226,11 @@ CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWr
     const unsigned int m0          = root_window.y().step();
     const unsigned int partial_n0  = _dst->dimension(0) % n0;
 
-    const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
-    const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
-
     CLBuildOptions build_opts;
 
     build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
     build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
     build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0));
-    build_opts.add_option("-DSCALE_X=" + float_to_string_with_full_precision(scale_x));
-    build_opts.add_option("-DSCALE_Y=" + float_to_string_with_full_precision(scale_y));
 
     return build_opts;
 }
diff --git a/src/gpu/cl/kernels/ClScaleKernel.cpp b/src/gpu/cl/kernels/ClScaleKernel.cpp
index 6f16adc657..7f24aa2eb6 100644
--- a/src/gpu/cl/kernels/ClScaleKernel.cpp
+++ b/src/gpu/cl/kernels/ClScaleKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -102,7 +102,6 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn
     float scale_x = 0.f;
     float scale_y = 0.f;
     std::tie(scale_x, scale_y) = calculate_scale_factors(src, dst, _data_layout, info.align_corners);
-    const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(src->data_type()) && info.interpolation_policy == InterpolationPolicy::BILINEAR;
 
     // Area interpolation behaves as Nearest Neighbour in case of up-sampling
     auto interpolation_policy_to_use = info.interpolation_policy;
@@ -141,17 +140,6 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn
         build_opts.add_option_if(info.align_corners, "-DALIGN_CORNERS");
         build_opts.add_option_if(is_data_type_float(src->data_type()), "-DIS_FLOATING_POINT");
         build_opts.add_option_if_else(info.sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
-        if(is_qasymm_bilinear)
-        {
-            const UniformQuantizationInfo qinfo = src->quantization_info().uniform();
-            build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale));
-            build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset));
-        }
-        else
-        {
-            build_opts.add_option("-DSCALE=" + support::cpp11::to_string(1));
-            build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(0));
-        }
     }
     else if(_data_layout == DataLayout::NCHW)
     {
@@ -169,6 +157,8 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn
         build_opts.add_option_if(info.border_mode == BorderMode::CONSTANT, "-DBORDER_MODE_CONSTANT");
         build_opts.add_option_if(info.align_corners, "-DALIGN_CORNERS");
         build_opts.add_option_if_else(info.sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
+
+        const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(src->data_type()) && info.interpolation_policy == InterpolationPolicy::BILINEAR;
         if(is_qasymm_bilinear)
         {
             const UniformQuantizationInfo qinfo = src->quantization_info().uniform();