1 files changed, 0 insertions, 409 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
deleted file mode 100644
index 811cd79811..0000000000
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
-
-#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/ICLKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-namespace arm_compute
-{
-namespace experimental
-{
-namespace dynamic_fusion
-{
-ComponentType ClDirectConvolutionKernelComponent::get_component_type() const
-{
-    return ComponentType::Complex;
-}
-
-std::set<std::string> ClDirectConvolutionKernelComponent::get_headers_list() const
-{
-    return std::set<std::string> { "helpers.h", "tile_helpers.h" };
-}
-
-Window ClDirectConvolutionKernelComponent::get_window() const
-{
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-    auto       dst_info    = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-
-    // Get dst shape
-    PadStrideInfo pad_stride_info
-    {
-        static_cast<unsigned int>(_desc.conv2d.stride.x()),
-        static_cast<unsigned int>(_desc.conv2d.stride.y()),
-        static_cast<unsigned int>(_desc.conv2d.pad.left),
-        static_cast<unsigned int>(_desc.conv2d.pad.right),
-        static_cast<unsigned int>(_desc.conv2d.pad.top),
-        static_cast<unsigned int>(_desc.conv2d.pad.bottom),
-        DimensionRoundingType::FLOOR /*default rounding type*/
-    };
-    TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src_info, *weight_info, pad_stride_info);
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*dst_info, output_shape,
-                       1,
-                       src_info->data_type(),
-                       src_info->quantization_info());
-
-    const unsigned int vec_size = std::min(static_cast<unsigned int>(dst_info->tensor_shape()[0]), 4u);
-    const unsigned int num_rows = (dst_info->tensor_shape()[0] > 16) ? ((src_info->data_type() == DataType::F32) ? 2U : 4U) : 1U;
-    // const unsigned int num_rows = 1;
-    // const unsigned int vec_size = tile_info.tile_dims.x();
-    // const unsigned int num_rows = tile_info.tile_dims.y();
-
-    // Create and configure kernel window
-    Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
-
-    const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
-    win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
-    win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
-
-    return win;
-}
-
-std::string ClDirectConvolutionKernelComponent::get_additional_macros() const
-{
-    return R"_()_"; // no macros
-}
-
-std::string ClDirectConvolutionKernelComponent::get_component_code() const
-{
-    const auto src_info  = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id);
-
-    ARM_COMPUTE_ERROR_ON_MSG(src_info->data_layout() != DataLayout::NHWC, "Only NHWC data layout is supported by this component.");
-
-    const auto channel_idx   = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL);
-    const auto k0            = adjust_vec_size(is_data_type_quantized(src_info->data_type()) ? 16u : 8u, src_info->dimension(channel_idx));
-    const bool leftover_loop = (src_info->dimension(channel_idx) % k0) != 0;
-
-    std::string code = R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} ---------------------
-    // IN_0(src)            {{src}}
-    // IN_1(wei)            {{weight}}
-    )_";
-    if(bias_info != nullptr)
-    {
-        code += R"_(
-    // IN_1(bia)            {{bias}}
-    )_";
-    }
-    code += R"_(
-    // OUT(dst, accum)      {{dst}}
-
-    // Initialize the accumulators
-    TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}});
-    {
-        // All the tensor dimensions are passed at compile time.
-        // In case of dynamic tensor support, the following dimensions should be passed as function argument.
-    #define _IWEI_WIDTH {{WEI_WIDTH}}
-    #define _IWEI_HEIGHT {{WEI_HEIGHT}}
-    #define _ISRC_WIDTH {{src}}_w
-    #define _ISRC_HEIGHT {{src}}_h
-    #define _ISRC_CHANNELS {{src}}_c
-    #define _IDST_WIDTH {{arg_dst}}_w
-    #define _IDST_HEIGHT {{arg_dst}}_h
-    #define _IDST_CHANNELS {{arg_dst}}_c
-    #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
-
-        // .v    = access the whole vector (OpenCL vector)
-        // .s[x] = access the vector element at position x (scalar access)
-        TILE(int, M0, 1, xi);
-        TILE(int, M0, 1, yi);
-
-        // Convert the linear index to coordinate
-        LOOP_UNROLLING(int, i, 0, 1, M0,
-        {
-            xi[i].v = ((mout + i) % _IDST_WIDTH) * {{STRIDE_X}};
-            yi[i].v = ((mout + i) / _IDST_WIDTH) * {{STRIDE_Y}};
-            xi[i].v -= {{PAD_LEFT}};
-            yi[i].v -= {{PAD_TOP}};
-        })
-
-        LOOP_UNROLLING(int, i, 0, 1, M0,
-        {
-            {{dst}}[i].v = 0;
-        })
-
-        for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i)
-        {
-            int ck = 0;
-            int xk = i % _IWEI_WIDTH;
-            int yk = i / _IWEI_HEIGHT;
-
-            int k = 0;
-            for(; k <= (_ISRC_CHANNELS - K0); k += K0)
-            {
-                TILE({{SRC_DATA_TYPE}}, M0, K0, a);
-                TILE({{WEI_DATA_TYPE}}, N0, K0, b);
-
-                LOOP_UNROLLING(int, i, 0, 1, M0,
-                {
-                    a[i].v = {{ZERO_VALUE}};
-                })
-
-                // Load tile from the src tensor
-                T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
-
-                // Load tile from the weights tensor
-                T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
-                // Compute the matrix multiplication between two tiles
-                T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}});
-
-                ck += K0;
-            }
-
-            // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS
-            // This #if directive should be removed in case of dynamic tensor support
-    )_";
-
-    if(leftover_loop)
-    {
-        code += R"_(
-            // Left-over accumulations
-            for(; k < _ISRC_CHANNELS; ++k)
-            {
-                TILE({{SRC_DATA_TYPE}}, M0, 1, a);
-                TILE({{WEI_DATA_TYPE}}, N0, 1, b);
-
-                LOOP_UNROLLING(int, i, 0, 1, M0,
-                {
-                    a[i].v = {{ZERO_VALUE}};
-                })
-
-                // Load tile from the src tensor
-                T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a);
-
-                // Load tile from the weights tensor
-                // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration
-                T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b);
-
-                // Compute the matrix multiplication between two tiles
-                T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
-
-                ++ck;
-            }
-        )_";
-    }
-
-    code += R"_(
-    #undef _I_WEI_WIDTH
-    #undef _I_WEI_HEIGHT
-    #undef _ISRC_WIDTH
-    #undef _ISRC_HEIGHT
-    #undef _ISRC_CHANNELS
-    #undef _IDST_WIDTH
-    #undef _IDST_HEIGHT
-    #undef _IDST_CHANNELS
-    #undef _IY_MULTIPLIER
-
-        }
-    )_";
-
-    if(bias_info != nullptr)
-    {
-        code += R"_(
-            TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
-
-            T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0);
-
-            // c = c + bias[broadcasted]
-            T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
-        )_";
-    }
-
-    code += R"_(
-    }
-//------------------ END KERNEL {{meta_kernel_id}} ---------------------
-    )_";
-    return code.c_str();
-}
-
-bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout)
-{
-    if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
-    {
-        return false;
-    }
-
-    // If not floating point
-    if(!is_data_type_float(tensor->data_type()))
-    {
-        return false;
-    }
-
-    if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
-    {
-        return false;
-    }
-
-    // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
-    if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
-    {
-        return false;
-    }
-
-    // Check cl image pitch alignment
-    if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0)
-    {
-        return false;
-    }
-
-    const size_t image_w     = tensor->tensor_shape()[0] / 4;
-    const size_t image_h     = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
-    const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
-    const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
-    if(image_w > max_image_w || image_h > max_image_h)
-    {
-        return false;
-    }
-
-    return true;
-}
-
-CLBuildOptions ClDirectConvolutionKernelComponent::generate_build_options() const
-{
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    auto       weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-    const auto dst_info    = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
-    // const auto tile_info  = _blueprint->impl().get_tile_info();
-
-    const unsigned int channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL);
-    const DataType     data_type   = src_info->data_type();
-    const GPUTarget    gpu_target  = CLScheduler::get().target();
-
-    const unsigned int n0                 = _blueprint->impl().get_execution_window().x().step();
-    const unsigned int m0                 = _blueprint->impl().get_execution_window().y().step();
-    const unsigned int k0                 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src_info->dimension(channel_idx));
-    const unsigned int partial_store_n0   = dst_info->dimension(0) % n0;
-    const bool         export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout());
-
-    // Update the padding for the weights tensor if we can export to cl_image
-    if(export_to_cl_image)
-    {
-        arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weight_info);
-    }
-
-    CLBuildOptions build_opts{};
-    build_opts.add_option("-cl-fast-relaxed-math");
-    build_opts.add_option("-DIS_TILED");
-    build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
-    build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
-    build_opts.add_option("-DK0=" + support::cpp11::to_string(k0));
-    build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
-
-    return build_opts;
-}
-
-void ClDirectConvolutionKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
-{
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-
-    vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
-
-    const GPUTarget             gpu_target         = CLScheduler::get().target();
-    const bool                  export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout());
-    const ClKernelTensorArgType weight_type        = export_to_cl_image ? ClKernelTensorArgType::Tensor_4D_t_Image : ClKernelTensorArgType::Tensor_4D_t_Buffer;
-    vtable.add(_weight, _blueprint->impl().group(_weight.arg_id), ClKernelArgDescriptor(_weight.arg_id, weight_type), "weight");
-
-    if(!_bias.is_empty()) // optional bias
-    {
-        vtable.add(_bias, _blueprint->impl().group(_bias.arg_id), ClKernelArgDescriptor(_bias.arg_id, ClKernelTensorArgType::Vector), "bias");
-    }
-    vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
-}
-
-ClDirectConvolutionKernelComponent::TagLUT ClDirectConvolutionKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
-{
-    TagLUT lut{};
-
-    const auto src_info    = _blueprint->impl().get_kernel_argument_info(_src.arg_id);
-    const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id);
-    const auto bias_info   = _blueprint->impl().get_kernel_argument_info(_bias.arg_id);
-
-    // Arguments and global shared variables
-    lut["src"]    = vtable.get(_src);
-    lut["weight"] = vtable.get(_weight);
-
-    if(!_bias.is_empty()) // optional bias
-    {
-        lut["bias"]          = vtable.get(_bias);
-        lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(bias_info->data_type());
-    }
-    lut["dst"] = vtable.get(_dst);
-
-    const auto dst_argument = _blueprint->impl().get_argument_shared_vars().get_dst_var();
-    lut["arg_dst"]          = dst_argument.uniq_name;
-
-    // Local build options
-    lut["meta_kernel_id"] = id();
-    lut["ACC_DATA_TYPE"]  = src_info->data_type();
-    lut["SRC_DATA_TYPE"]  = src_info->data_type();
-    lut["WEI_DATA_TYPE"]  = weight_info->data_type();
-
-    lut["SRC_TENSOR_TYPE"] = "BUFFER";
-    switch(vtable.get(_weight).desc.tensor_arg_type)
-    {
-        case ClKernelTensorArgType::Image_Export_To_ClImage2D:
-        case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D:
-        case ClKernelTensorArgType::Tensor_4D_t_Image:
-        {
-            lut["WEI_TENSOR_TYPE"] = "IMAGE";
-            break;
-        }
-        default:
-        {
-            lut["WEI_TENSOR_TYPE"] = "BUFFER";
-            break;
-        }
-    }
-    const auto width_idx  = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::WIDTH);
-    const auto height_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::HEIGHT);
-    lut["WEI_WIDTH"]      = weight_info->dimension(width_idx);
-    lut["WEI_HEIGHT"]     = weight_info->dimension(height_idx);
-
-    lut["STRIDE_X"] = _desc.conv2d.stride.x();
-    lut["STRIDE_Y"] = _desc.conv2d.stride.y();
-
-    lut["PAD_LEFT"] = _desc.conv2d.pad.left;
-    lut["PAD_TOP"]  = _desc.conv2d.pad.top;
-
-    lut["ZERO_VALUE"] = 0;
-
-    return lut;
-}
-} // namespace dynamic_fusion
-} // namespace experimental
-} // namespace arm_compute
-#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
-\ No newline at end of file