Setup automatic kernel window for dynamic fusion

The window of the fused kernels is equal to the root kernel's window. For this reason, the following kernels does not change the window and intermediate tensors have the same shape and info as the destination tensor. Resolves: COMPMID-5152 Change-Id: I25fe2fab8304ecaabfc2e4ade9bbf31a600a5033 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7316 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com>
author: Gunes Bayir <gunes.bayir@arm.com> 2022-03-10 21:21:01 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> 2022-03-17 17:41:21 +0000
commit: 8a87983c90299dfc7d6fbda3dba312e7603d7074 (patch)
tree: ad1299cd902e6b2e5662f3f6e1b8fd12835b8469 /tests/validation/CL/UNIT
parent: 193cad36d8ff70792562390b554304cc19284f61 (diff)
download: ComputeLibrary-8a87983c90299dfc7d6fbda3dba312e7603d7074.tar.gz
1 files changed, 1 insertions, 93 deletions
diff --git a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp
index c4e7033914..753e0a4625 100644
--- a/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp
+++ b/tests/validation/CL/UNIT/dynamic_fusion/ClCompositeKernel.cpp
@@ -21,7 +21,6 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-
 #if defined(ENABLE_EXPERIMENTAL_DYNAMIC_FUSION)
 
 #include "src/gpu/cl/kernels/experimental/dynamic_fusion/ClCompositeKernel.h"
@@ -77,85 +76,6 @@ void fill(U &&tensor, int seed)
     library->fill_borders_with_garbage(tensor, distribution_inf, seed);
 }
 
-using ElementsProcessed = Steps;
-std::pair<Status, Window> mock_gemm_native_validate_and_configure_window(ITensorInfo *src0, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const GEMMLHSMatrixInfo &lhs_info,
-                                                                         const GEMMRHSMatrixInfo &rhs_info,
-                                                                         const GEMMKernelInfo &gemm_info, ElementsProcessed &num_elements_processed)
-{
-    unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
-    unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
-    bool          reinterpret_input_as_3d             = gemm_info.reinterpret_input_as_3d;
-    bool          reinterpret_output_as_3d            = gemm_info.depth_output_gemm3d != 0;
-
-    Window win{};
-    Window win_out{};
-    bool   window_changed = false;
-
-    // In case both input and dst have to be reinterpreted as 3D tensors,
-    // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
-    if(reinterpret_input_as_3d == reinterpret_output_as_3d)
-    {
-        reinterpret_output_as_3d = false;
-    }
-
-    // dst tensor auto initialization if not yet initialized
-    auto_init_if_empty(*dst, src0->clone()->set_tensor_shape(misc::shape_calculator::compute_mm_shape(*src0, *src1, gemm_info)));
-
-    TensorInfo tmp_info(*dst);
-
-    if(reinterpret_output_as_3d)
-    {
-        // Since the dst tensor has to be reinterpreted as 3D and the execute window is based on a 2D GEMM,
-        // the window needs to be constructed on the 2D collapsed version of the tensor
-        TensorShape tmp_shape(dst->tensor_shape());
-        tmp_shape.collapse(2U, 1U);
-        tmp_info.set_tensor_shape(tmp_shape);
-    }
-
-    // Configure kernel window
-    num_elems_processed_per_iteration_x = rhs_info.n0;
-    num_elems_processed_per_iteration_y = lhs_info.m0;
-
-    win     = calculate_max_window(tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-    win_out = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
-
-    AccessWindowStatic src0_access(src0, 0, 0,
-                                   src0->dimension(0),
-                                   src0->dimension(1));
-    AccessWindowStatic src1_access(src1, 0, 0,
-                                   ceil_to_multiple(src1->dimension(0), num_elems_processed_per_iteration_x),
-                                   src1->dimension(1));
-    AccessWindowStatic dst_access(dst, 0, 0,
-                                  dst->dimension(0),
-                                  dst->dimension(1));
-
-    if(src2 != nullptr)
-    {
-        const int bias_processed_per_iteration_x = num_elems_processed_per_iteration_x;
-
-        AccessWindowStatic src2_access(src2, 0, 0,
-                                       ceil_to_multiple(src2->dimension(0), bias_processed_per_iteration_x),
-                                       src2->dimension(1));
-
-        window_changed = update_window_and_padding(win, src0_access, src1_access, src2_access) || // window used by the execute_window_loop
-                         update_window_and_padding(win_out, dst_access);                          // window used to update the padding requirements of dst tensor
-    }
-    else
-    {
-        window_changed = update_window_and_padding(win, src0_access, src1_access) || // window used by the execute_window_loop
-                         update_window_and_padding(win_out, dst_access);             // window used to update the padding requirements of dst tensor
-    }
-
-    // Collapse along the Z direction
-    // This collapse needs to be here in order to tune the Z dimension of LWS
-    Window             collapsed             = win;
-    const unsigned int dimension_to_collapse = std::min(static_cast<unsigned int>(dst->num_dimensions()), 2u);
-    collapsed                                = win.collapse(win, dimension_to_collapse);
-
-    Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
-    return std::make_pair(err, collapsed);
-}
-
 void set_build_options(ClKernelCode &cl_code, GemmNativeDescriptor gemm_native_desc,
                        const TensorInfo &t_lhs_info,
                        const TensorInfo &t_rhs_info,
@@ -241,7 +161,7 @@ TEST_CASE(MoveNet_SubGraph_1, framework::DatasetMode::ALL)
     const auto t_dst_shape = TensorShape(n, m);
     auto       t_lhs_info  = TensorInfo(t_lhs_shape, 1, data_type);
     auto       t_rhs_info  = TensorInfo(t_rhs_shape, 1, data_type);
-    const auto t_bias_info = TensorInfo(TensorShape(), 1, DataType::F32);
+    auto       t_bias_info = TensorInfo(TensorShape(), 1, DataType::F32);
     auto       t_dst_info  = TensorInfo(t_dst_shape, 1, data_type);
 
     const ClTensorDescriptor t_lhs_desc{ &t_lhs_info, 2 };
@@ -270,7 +190,6 @@ TEST_CASE(MoveNet_SubGraph_1, framework::DatasetMode::ALL)
     ArgumentID tid_acc;
     st = add_tensor_intermed(bp, tid_acc);
     st = add_kcomp_gemm_native(bp, common_kernel_desc, gemm_native_desc, tid_lhs, tid_rhs, tid_l0_bias, tid_acc);
-
     st = add_kcomp_eltwise_add(bp, common_kernel_desc, EltwiseAddDescriptor{}, tid_l1_addend, tid_acc, tid_acc);
     st = add_kcomp_store(bp, common_kernel_desc, tid_acc, tid_dst, StoreType::StoreBlockBoundaryAware);
 
@@ -278,13 +197,7 @@ TEST_CASE(MoveNet_SubGraph_1, framework::DatasetMode::ALL)
 
     st = set_tile_info(bp, store_tile_info);
     st = build(cl_code, ClCodeBuilderContext{ GpuInfo{ GPUTarget::G71 } }, bp);
-
     set_build_options(cl_code, gemm_native_desc, t_lhs_info, t_rhs_info, nullptr, t_dst_info);
-    ElementsProcessed num_elements_processed{};
-    auto              win_config = mock_gemm_native_validate_and_configure_window(&t_lhs_info, &t_rhs_info, nullptr, &t_dst_info, gemm_native_desc.lhs_info, gemm_native_desc.rhs_info, gemm_info,
-                                                                                  num_elements_processed);
-    ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-    cl_code.window = win_config.second;
 
     ClExecutionDescriptor exec_desc;
     st = tune_static(exec_desc, cl_code);
@@ -432,11 +345,6 @@ TEST_CASE(MoveNet_SubGraph_1, framework::DatasetMode::ALL)
         st = set_tile_info(bp, store_tile_info);
         st = build(cl_code, ClCodeBuilderContext{ GpuInfo{ GPUTarget::G71 } }, bp);
         set_build_options(cl_code, gemm_native_desc, t_lhs_info, t_rhs_info, nullptr, t_dst_info);
-        ElementsProcessed num_elements_processed{};
-        auto              win_config = mock_gemm_native_validate_and_configure_window(&t_lhs_info, &t_rhs_info, nullptr, &t_dst_info, gemm_native_desc.lhs_info, gemm_native_desc.rhs_info, gemm_info,
-                                                                                      num_elements_processed);
-        ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-        cl_code.window = win_config.second;
         TOCK(cond0_build_time, measurements)
 
         TICK(cond0_tune_time)
author	Gunes Bayir <gunes.bayir@arm.com>	2022-03-10 21:21:01 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	2022-03-17 17:41:21 +0000
commit	8a87983c90299dfc7d6fbda3dba312e7603d7074 (patch)
tree	ad1299cd902e6b2e5662f3f6e1b8fd12835b8469 /tests/validation/CL/UNIT
parent	193cad36d8ff70792562390b554304cc19284f61 (diff)
download	ComputeLibrary-8a87983c90299dfc7d6fbda3dba312e7603d7074.tar.gz