From b7e8626717b2ef81b0d03284c8f6ffdbe9cd2245 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Mon, 26 Dec 2022 16:24:04 +0000 Subject: Optimize CL Scale/Resize Quantized by removing (de)quant. code This patch removes the quant/dequant code in CLScale and the Resize operator in dynamic fusion. We don't support different quantization information for input and output and in this case the quantization and dequantization is not necessary. The very same optimization was delivered for cpu. It also moves the SCALE_X and SCALE_Y arguments to look-up table from build options in the template writer of Resize. Change-Id: Icd043c8671220c8feea935dd4b24a5b17c6c4ea4 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8888 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- .../gpu/template_writer/cl/ClTemplateResize.cpp | 71 ++++++---------------- 1 file changed, 18 insertions(+), 53 deletions(-) (limited to 'src/dynamic_fusion/sketch/gpu/template_writer') diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp index 7ee79e82af..a2c04d94e5 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp @@ -66,15 +66,15 @@ TILE(uint, 1, 1, g_dst_indirect_y); if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) { code += R"_( - float xi_f = (g_ind_1 * SCALE_X); - float yi_f = (yo * SCALE_Y); + float xi_f = (g_ind_1 * {{SCALE_X}}); + float yi_f = (yo * {{SCALE_Y}}); )_"; } else { code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * SCALE_X); - float yi_f = ((yo + 0.5f) * SCALE_Y); + float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}}); + float yi_f = ((yo + 0.5f) * {{SCALE_Y}}); )_"; } @@ -98,15 +98,15 @@ TILE(uint, 1, 1, g_dst_indirect_y); if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) { code += R"_( - float xi_f = (g_ind_1 * SCALE_X); - float yi_f = (yo * SCALE_Y); + float xi_f = (g_ind_1 * {{SCALE_X}}); + float yi_f = (yo * {{SCALE_Y}}); )_"; } else { code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * SCALE_X - 0.5f); - float yi_f = ((yo + 0.5f) * SCALE_Y - 0.5f); + float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f); + float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f); )_"; } @@ -150,37 +150,16 @@ TILE(uint, 1, 1, g_dst_indirect_y); else { code += R"_( - TILE(float, 1, N0, out_f); - TILE(float, 1, N0, in00_f); - TILE(float, 1, N0, in01_f); - TILE(float, 1, N0, in10_f); - TILE(float, 1, N0, in11_f); - const float a = (xi_f - (float)xi); const float b = (1.f - a); const float a1 = (yi_f - (float)yi); const float b1 = (1.f - a1); -)_" - // Dequantize - R"_( - LOOP_UNROLLING(int, n0, 0, 1, N0, - { - in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}}; - in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}}; - in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}}; - in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float){{OFFSET}}) * (float){{SCALE}}; - }) -)_" - // Calculate the output in the floating-point domain - R"_( - out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1)); -)_" - // Quantize - R"_( - LOOP_UNROLLING(int, n0, 0, 1, N0, - { - {{dst}}[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float){{SCALE}} + (float){{OFFSET}}, {{DST_DATA_TYPE}}); - }) + + {{dst}}[0].v = CONVERT_SAT( + (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + + (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) + + (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + + (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0)); )_"; } } @@ -231,20 +210,11 @@ TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type()); - const bool is_qasymm_bilinear = is_data_type_quantized_asymmetric(_src->data_type()) - && _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR; + const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners()); + const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners()); - if(is_qasymm_bilinear) - { - const UniformQuantizationInfo qinfo = _src->quantization_info().uniform(); - lut["SCALE"] = support::cpp11::to_string(qinfo.scale); - lut["OFFSET"] = support::cpp11::to_string(qinfo.offset); - } - else - { - lut["SCALE"] = support::cpp11::to_string(1); - lut["OFFSET"] = support::cpp11::to_string(0); - } + lut["SCALE_X"] = float_to_string_with_full_precision(scale_x); + lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y); return lut; } @@ -256,16 +226,11 @@ CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWr const unsigned int m0 = root_window.y().step(); const unsigned int partial_n0 = _dst->dimension(0) % n0; - const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners()); - const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners()); - CLBuildOptions build_opts; build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0)); - build_opts.add_option("-DSCALE_X=" + float_to_string_with_full_precision(scale_x)); - build_opts.add_option("-DSCALE_Y=" + float_to_string_with_full_precision(scale_y)); return build_opts; } -- cgit v1.2.1