diff options
Diffstat (limited to 'src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components')
9 files changed, 0 insertions, 1387 deletions
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp deleted file mode 100644 index 811cd79811..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.cpp +++ /dev/null @@ -1,409 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h" - -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "src/core/CL/ICLKernel.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" -#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClDirectConvolutionKernelComponent::get_component_type() const -{ - return ComponentType::Complex; -} - -std::set<std::string> ClDirectConvolutionKernelComponent::get_headers_list() const -{ - return std::set<std::string> { "helpers.h", "tile_helpers.h" }; -} - -Window ClDirectConvolutionKernelComponent::get_window() const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - // Get dst shape - PadStrideInfo pad_stride_info - { - static_cast<unsigned int>(_desc.conv2d.stride.x()), - static_cast<unsigned int>(_desc.conv2d.stride.y()), - static_cast<unsigned int>(_desc.conv2d.pad.left), - static_cast<unsigned int>(_desc.conv2d.pad.right), - static_cast<unsigned int>(_desc.conv2d.pad.top), - static_cast<unsigned int>(_desc.conv2d.pad.bottom), - DimensionRoundingType::FLOOR /*default rounding type*/ - }; - TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src_info, *weight_info, pad_stride_info); - - // Output auto initialization if not yet initialized - auto_init_if_empty(*dst_info, output_shape, - 1, - src_info->data_type(), - src_info->quantization_info()); - - const unsigned int vec_size = std::min(static_cast<unsigned int>(dst_info->tensor_shape()[0]), 4u); - const unsigned int num_rows = (dst_info->tensor_shape()[0] > 16) ? ((src_info->data_type() == DataType::F32) ? 2U : 4U) : 1U; - // const unsigned int num_rows = 1; - // const unsigned int vec_size = tile_info.tile_dims.x(); - // const unsigned int num_rows = tile_info.tile_dims.y(); - - // Create and configure kernel window - Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows)); - - const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows); - win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows)); - win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1)); - - return win; -} - -std::string ClDirectConvolutionKernelComponent::get_additional_macros() const -{ - return R"_()_"; // no macros -} - -std::string ClDirectConvolutionKernelComponent::get_component_code() const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id); - - ARM_COMPUTE_ERROR_ON_MSG(src_info->data_layout() != DataLayout::NHWC, "Only NHWC data layout is supported by this component."); - - const auto channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL); - const auto k0 = adjust_vec_size(is_data_type_quantized(src_info->data_type()) ? 16u : 8u, src_info->dimension(channel_idx)); - const bool leftover_loop = (src_info->dimension(channel_idx) % k0) != 0; - - std::string code = R"_( - //------------------ START KERNEL {{meta_kernel_id}} --------------------- - // IN_0(src) {{src}} - // IN_1(wei) {{weight}} - )_"; - if(bias_info != nullptr) - { - code += R"_( - // IN_1(bia) {{bias}} - )_"; - } - code += R"_( - // OUT(dst, accum) {{dst}} - - // Initialize the accumulators - TILE({{ACC_DATA_TYPE}}, M0, N0, {{dst}}); - { - // All the tensor dimensions are passed at compile time. - // In case of dynamic tensor support, the following dimensions should be passed as function argument. - #define _IWEI_WIDTH {{WEI_WIDTH}} - #define _IWEI_HEIGHT {{WEI_HEIGHT}} - #define _ISRC_WIDTH {{src}}_w - #define _ISRC_HEIGHT {{src}}_h - #define _ISRC_CHANNELS {{src}}_c - #define _IDST_WIDTH {{arg_dst}}_w - #define _IDST_HEIGHT {{arg_dst}}_h - #define _IDST_CHANNELS {{arg_dst}}_c - #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT) - - // .v = access the whole vector (OpenCL vector) - // .s[x] = access the vector element at position x (scalar access) - TILE(int, M0, 1, xi); - TILE(int, M0, 1, yi); - - // Convert the linear index to coordinate - LOOP_UNROLLING(int, i, 0, 1, M0, - { - xi[i].v = ((mout + i) % _IDST_WIDTH) * {{STRIDE_X}}; - yi[i].v = ((mout + i) / _IDST_WIDTH) * {{STRIDE_Y}}; - xi[i].v -= {{PAD_LEFT}}; - yi[i].v -= {{PAD_TOP}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) - - for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i) - { - int ck = 0; - int xk = i % _IWEI_WIDTH; - int yk = i / _IWEI_HEIGHT; - - int k = 0; - for(; k <= (_ISRC_CHANNELS - K0); k += K0) - { - TILE({{SRC_DATA_TYPE}}, M0, K0, a); - TILE({{WEI_DATA_TYPE}}, N0, K0, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - // Load tile from the src tensor - T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a); - - // Load tile from the weights tensor - T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - // Compute the matrix multiplication between two tiles - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}}); - - ck += K0; - } - - // We voluntarily use SRC_CHANNELS rather than _DSRC_CHANNELS - // This #if directive should be removed in case of dynamic tensor support - )_"; - - if(leftover_loop) - { - code += R"_( - // Left-over accumulations - for(; k < _ISRC_CHANNELS; ++k) - { - TILE({{SRC_DATA_TYPE}}, M0, 1, a); - TILE({{WEI_DATA_TYPE}}, N0, 1, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - // Load tile from the src tensor - T_LOAD_NHWC_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yk, xk, ck, _ISRC_WIDTH, _ISRC_HEIGHT, {{src}}_stride_y, xi, yi, a); - - // Load tile from the weights tensor - // The T_LOAD for the left-over elements can only use BUFFER because we load one element per iteration - T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, cout * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - // Compute the matrix multiplication between two tiles - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}}); - - ++ck; - } - )_"; - } - - code += R"_( - #undef _I_WEI_WIDTH - #undef _I_WEI_HEIGHT - #undef _ISRC_WIDTH - #undef _ISRC_HEIGHT - #undef _ISRC_CHANNELS - #undef _IDST_WIDTH - #undef _IDST_HEIGHT - #undef _IDST_CHANNELS - #undef _IY_MULTIPLIER - - } - )_"; - - if(bias_info != nullptr) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, bias0); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, cout, 0, 1, 0, bias0); - - // c = c + bias[broadcasted] - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); - )_"; - } - - code += R"_( - } -//------------------ END KERNEL {{meta_kernel_id}} --------------------- - )_"; - return code.c_str(); -} - -bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, DataLayout data_layout) -{ - if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC)) - { - return false; - } - - // If not floating point - if(!is_data_type_float(tensor->data_type())) - { - return false; - } - - if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD) - { - return false; - } - - // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform - if(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device())) - { - return false; - } - - // Check cl image pitch alignment - if(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0) - { - return false; - } - - const size_t image_w = tensor->tensor_shape()[0] / 4; - const size_t image_h = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3]; - const size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>(); - const size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>(); - - if(image_w > max_image_w || image_h > max_image_h) - { - return false; - } - - return true; -} - -CLBuildOptions ClDirectConvolutionKernelComponent::generate_build_options() const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // const auto tile_info = _blueprint->impl().get_tile_info(); - - const unsigned int channel_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::CHANNEL); - const DataType data_type = src_info->data_type(); - const GPUTarget gpu_target = CLScheduler::get().target(); - - const unsigned int n0 = _blueprint->impl().get_execution_window().x().step(); - const unsigned int m0 = _blueprint->impl().get_execution_window().y().step(); - const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, src_info->dimension(channel_idx)); - const unsigned int partial_store_n0 = dst_info->dimension(0) % n0; - const bool export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout()); - - // Update the padding for the weights tensor if we can export to cl_image - if(export_to_cl_image) - { - arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weight_info); - } - - CLBuildOptions build_opts{}; - build_opts.add_option("-cl-fast-relaxed-math"); - build_opts.add_option("-DIS_TILED"); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DK0=" + support::cpp11::to_string(k0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -void ClDirectConvolutionKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); - - const GPUTarget gpu_target = CLScheduler::get().target(); - const bool export_to_cl_image = export_to_cl_image_support(weight_info, gpu_target, src_info->data_layout()); - const ClKernelTensorArgType weight_type = export_to_cl_image ? ClKernelTensorArgType::Tensor_4D_t_Image : ClKernelTensorArgType::Tensor_4D_t_Buffer; - vtable.add(_weight, _blueprint->impl().group(_weight.arg_id), ClKernelArgDescriptor(_weight.arg_id, weight_type), "weight"); - - if(!_bias.is_empty()) // optional bias - { - vtable.add(_bias, _blueprint->impl().group(_bias.arg_id), ClKernelArgDescriptor(_bias.arg_id, ClKernelTensorArgType::Vector), "bias"); - } - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); -} - -ClDirectConvolutionKernelComponent::TagLUT ClDirectConvolutionKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - - const auto src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - const auto weight_info = _blueprint->impl().get_kernel_argument_info(_weight.arg_id); - const auto bias_info = _blueprint->impl().get_kernel_argument_info(_bias.arg_id); - - // Arguments and global shared variables - lut["src"] = vtable.get(_src); - lut["weight"] = vtable.get(_weight); - - if(!_bias.is_empty()) // optional bias - { - lut["bias"] = vtable.get(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(bias_info->data_type()); - } - lut["dst"] = vtable.get(_dst); - - const auto dst_argument = _blueprint->impl().get_argument_shared_vars().get_dst_var(); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = src_info->data_type(); - lut["SRC_DATA_TYPE"] = src_info->data_type(); - lut["WEI_DATA_TYPE"] = weight_info->data_type(); - - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - switch(vtable.get(_weight).desc.tensor_arg_type) - { - case ClKernelTensorArgType::Image_Export_To_ClImage2D: - case ClKernelTensorArgType::Image_3D_Export_To_ClImage2D: - case ClKernelTensorArgType::Tensor_4D_t_Image: - { - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - } - default: - { - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - } - const auto width_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::WIDTH); - const auto height_idx = get_data_layout_dimension_index(src_info->data_layout(), DataLayoutDimension::HEIGHT); - lut["WEI_WIDTH"] = weight_info->dimension(width_idx); - lut["WEI_HEIGHT"] = weight_info->dimension(height_idx); - - lut["STRIDE_X"] = _desc.conv2d.stride.x(); - lut["STRIDE_Y"] = _desc.conv2d.stride.y(); - - lut["PAD_LEFT"] = _desc.conv2d.pad.left; - lut["PAD_TOP"] = _desc.conv2d.pad.top; - - lut["ZERO_VALUE"] = 0; - - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h deleted file mode 100644 index 5babdbab51..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -#include "utils/TypePrinter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClDirectConvolutionKernelComponent : public IClKernelComponent -{ -public: - ClDirectConvolutionKernelComponent(ClKernelBlueprint *blueprint, const ClDirectConv2dKernelDescriptor &desc, - const Link &src, const Link &weight, const Link &dst, const Link &bias = Link{}) - : IClKernelComponent(blueprint), _desc{ desc }, _src{ src }, _weight{ weight }, _bias{ bias }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set<std::string> get_headers_list() const override; - std::string get_additional_macros() const override; - std::string get_component_code() const override; - Window get_window() const override; - ClKernelArgList get_args(); - CLBuildOptions generate_build_options() const override; - - virtual std::vector<Link> get_links() const override - { - return { _src, _weight, _bias, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "direct_convolution_" + to_string(_blueprint->impl().get_kernel_argument_info(_src.arg_id)->data_layout()) + "_" + std::to_string(id()); - } - -private: - ClDirectConv2dKernelDescriptor _desc{}; - Link _src{}; - Link _weight{}; - Link _bias{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLDIRECTCONVOLUTIONKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp deleted file mode 100644 index e2eba68a63..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClElementwiseKernelComponent::get_component_type() const -{ - return ComponentType::Simple; -} - -std::set<std::string> ClElementwiseKernelComponent::get_headers_list() const -{ - return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; -} - -Window ClElementwiseKernelComponent::get_window() const -{ - const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id); - const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info); - - const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info); - const TensorShape &out_shape = broadcast_pair.first; - - auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type()); - - TensorShape output_shape = dst_info->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by Conv2d - output_shape.collapse(2U, 1U); - const unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} - -std::string ClElementwiseKernelComponent::get_component_code() const -{ - std::string code; - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - - if(is_root) - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- - // IN_0(LHS) {{lhs}} - // IN_1(RHS) {{rhs}} - // OUT(dst, accum) {{dst}} - - // dst = lhs + rhs (mix-precision, broadcast, boundary aware) - TILE({{DATA_TYPE}}, M0, N0, {{dst}}); - { - TILE({{DATA_TYPE}}, M0, N0, lhs_tile); - TILE({{DATA_TYPE}}, M0, N0, rhs_tile); - - // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w) - {{lhs}}_offset_first_element_in_bytes += bout * {{lhs}}_stride_w; - {{rhs}}_offset_first_element_in_bytes += bout * {{rhs}}_stride_w; - - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile); - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{rhs}}_stride_y, rhs_tile); - -#if defined(IS_BROADCAST) - T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); -#else // !defined(IS_BROADCAST) - T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}}); -#endif // defined(IS_BROADCAST) - - } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- -)_"; - } - else - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- - // IN_0/Out(Accumulator) {{acc}} - // IN_1(Addend) {{addend}} - - // acc = addend + acc (mix-precision, broadcast, boundary aware) - { - TILE({{DATA_TYPE}}, M0, N0, addend_tile); - - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{addend}}, {{rhs_start_x}}, {{rhs_start_y}}, 1, {{addend}}_stride_y, addend_tile); - -#if defined(IS_BROADCAST) - T_ELTWISE_BROADCAST_{{ELTWISE_OP}}_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); -#else // !defined(IS_BROADCAST) - T_ELTWISE_{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}}); -#endif // defined(IS_BROADCAST) - } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- -)_"; - } -} - -CLBuildOptions ClElementwiseKernelComponent::generate_build_options() const -{ - const auto t_rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - CLBuildOptions build_opts{}; - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const unsigned int partial_store_n0 = t_dst_info->dimension(0) % n0; - const bool is_broadcast = t_rhs_info->tensor_shape() != t_dst_info->tensor_shape(); - - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST"); - - return build_opts; -} - -std::string ClElementwiseKernelComponent::generate_config_id() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - std::string config_id{}; - config_id += lower_string(string_from_data_type(t_dst_info->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); - return config_id; -} - -void ClElementwiseKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs"); - vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs"); - if(is_root) - { - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); - } -} - -ClElementwiseKernelComponent::TagLUT ClElementwiseKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - ITensorInfo *t_addend_info = nullptr; - // Arguments and global shared variables - const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument; - if(is_root) - { - lut["lhs"] = vtable.get(_lhs); - lut["rhs"] = vtable.get(_rhs); - lut["dst"] = vtable.get(_dst); - t_addend_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id); - } - else - { - // Determine which link is the accumulator - Link accumulator; - Link addend; - if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic) - { - accumulator = _lhs; - addend = _rhs; - } - else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic) - { - accumulator = _rhs; - addend = _lhs; - } - else - { - ARM_COMPUTE_ERROR("Invalid elementwise component linking"); - } - lut["acc"] = vtable.get(accumulator); - lut["addend"] = vtable.get(addend); - t_addend_info = _blueprint->impl().get_kernel_argument_info(addend.arg_id); - } - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); - - switch(_desc.eltwise.op) - { - case ArithmeticOperation::DIV: - lut["ELTWISE_OP"] = "DIV"; - break; - case ArithmeticOperation::ADD: - lut["ELTWISE_OP"] = "ADD"; - break; - default: - ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); - } - - // Set broadcast parameters - // PRE: All tensors are broadcast-compatible - const bool is_broadcast = t_addend_info->tensor_shape() != t_dst_info->tensor_shape(); - if(is_broadcast) - { - // Note that n0 maps to input tensor dimension 0, m0 maps to input dimensions 1 and 2 because of our collapse strategy - if(t_addend_info->dimension(0) == 1U && t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in X, Y, Z: collapsed rhs win [M0xN0] = [1x1] - { - lut["rhs_m0"] = "1"; - lut["rhs_n0"] = "1"; - lut["rhs_start_y"] = "0"; - lut["rhs_start_x"] = "0"; - } - else if(t_addend_info->dimension(1) == 1U && t_addend_info->dimension(2) == 1U) // Broadcast in Y and Z: collapsed rhs win [M0xN0] = [1xN] - { - lut["rhs_m0"] = "1"; - lut["rhs_n0"] = "N0"; - lut["rhs_start_y"] = "0"; - lut["rhs_start_x"] = "cout"; - } - else - { - ARM_COMPUTE_ERROR("Only support rhs broadcasting in all X, Y, Z dimensions, or just in Y and Z dimensions"); - } - } - else - { - lut["rhs_m0"] = "M0"; - lut["rhs_n0"] = "N0"; - lut["rhs_start_y"] = "mout"; - lut["rhs_start_x"] = "cout"; - } - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h deleted file mode 100644 index f8377457d3..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClElementwiseKernelComponent : public IClKernelComponent -{ -public: - /** Construct a new Cl Elementwise Kernel Component object - * - * @param[in] blueprint Blueprint to which this component is added - * @param[in] desc Component descriptor - * @param[in] lhs Link to LHS tensor - * @param[in] rhs Link to RHS tensor - * @param[out] dst Link to DST tensor - * - * Support Level - * Data Type: F16, F32 - * Tensor Shape: Any shape of arbitrary dimension >= 1 and <= 4 - * Value Range: All - * Broadcasting: Only RHS tensor can be broadcasted into LHS. Only support broadcasting in dimension 1 and dimension 2 or all dimension 0, 1 and 2 - */ - ClElementwiseKernelComponent(ClKernelBlueprint *blueprint, const ClElementwiseKernelDescriptor &desc, const Link &lhs, const Link &rhs, const Link &dst) - : IClKernelComponent(blueprint), _desc{ desc }, _lhs{ lhs }, _rhs{ rhs }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set<std::string> get_headers_list() const override; - std::string get_component_code() const override; - Window get_window() const override; - CLBuildOptions generate_build_options() const override; - std::string generate_config_id() const override; - - virtual std::vector<Link> get_links() const override - { - return { _lhs, _rhs, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "eltwise_add_" + std::to_string(id()); - } - -private: - ClElementwiseKernelDescriptor _desc{}; - Link _lhs{}; - Link _rhs{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLELEMENTWISEADDKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp deleted file mode 100644 index 0a20a8f600..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClFloorKernelComponent::get_component_type() const -{ - return ComponentType::Simple; -} -std::set<std::string> ClFloorKernelComponent::get_headers_list() const -{ - return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" }; -} -Window ClFloorKernelComponent::get_window() const -{ - const ITensorInfo *src_info = _blueprint->impl().get_kernel_argument_info(_src.arg_id); - ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - - ARM_COMPUTE_ERROR_ON_NULLPTR(src_info, dst_info); - auto_init_if_empty(*dst_info, src_info->tensor_shape(), 1, src_info->data_type()); - - TensorShape output_shape = dst_info->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by Conv2d - output_shape.collapse(2U, 1U); - const unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} -std::string ClFloorKernelComponent::get_component_code() const -{ - bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument; - if(is_root) - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} FLOOR --------------------- - // IN_0(src) {{src}} - // OUT(dst, accum) {{dst}} - TILE({{DATA_TYPE}}, M0, N0, {{dst}}); - { - TILE({{DATA_TYPE}}, M0, N0, src_tile); - - // Since mout maps to dimensions 1 (y) and dimension 2 (z) of the input tensor because of the collapsed window, bout maps to dimension 3 (w) - {{src}}_offset_first_element_in_bytes += bout * {{src}}_stride_w; - T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{src}}, cout, mout, 1, {{src}}_stride_y, src_tile); - - T_FLOOR({{DATA_TYPE}}, M0, N0, src_tile, {{dst}}); - } - //------------------ END KERNEL {{meta_kernel_id}} FLOOR --------------------- -)_"; - } - else - { - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} FLOOR --------------------- - // IN_0/Out(Accumulator) {{acc}} - // output = floor(input) - { - T_FLOOR({{DATA_TYPE}}, M0, N0, {{acc}}, {{acc}}); - } - //------------------ END KERNEL {{meta_kernel_id}} FLOOR --------------------- -)_"; - } -} -CLBuildOptions ClFloorKernelComponent::generate_build_options() const -{ - CLBuildOptions build_opts{}; - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - const unsigned int partial_store_n0 = dst_info->dimension(0) % n0; - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - return build_opts; -} -std::string ClFloorKernelComponent::generate_config_id() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - std::string config_id{}; - config_id += lower_string(string_from_data_type(t_dst_info->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(t_dst_info->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(t_dst_info->data_layout())); - return config_id; -} -void ClFloorKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); -} -ClFloorKernelComponent::TagLUT ClFloorKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // Arguments and global shared variables - const bool is_root = _blueprint->impl().group(_src.arg_id) == SharedVarGroup::Argument; - - if(is_root) - { - lut["src"] = vtable.get(_src); - lut["dst"] = vtable.get(_dst); - } - else - { - lut["acc"] = vtable.get(_src); - } - - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type()); - return lut; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h deleted file mode 100644 index e791b36382..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClFloorKernelComponent : public IClKernelComponent -{ -public: - /** Construct a new Cl Floor Kernel Component object - * - * @param blueprint Blueprint to which this component is added - * @param src Link to SRC tensor - * @param dst Link to DST tensor - * - * Support Level - * Data Type: F16, F32 - * Tensor Shape: Any shape of arbitrary dimension >= 1 and <= 4 - * Value Range: All - */ - ClFloorKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) - : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } - { - } - - ComponentType get_component_type() const override; - std::set<std::string> get_headers_list() const override; - std::string get_component_code() const override; - Window get_window() const override; - CLBuildOptions generate_build_options() const override; - std::string generate_config_id() const override; - - virtual std::vector<Link> get_links() const override - { - return { _src, _dst }; - } - - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - virtual void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::string name() const override - { - return "floor_" + std::to_string(id()); - } - -private: - Link _src{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLFLOORKERNELCOMPONENT_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */ diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h deleted file mode 100644 index 3f99dd5553..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClKernelComponents.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClDirectConvolutionKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClFloorKernelComponent.h" -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h" - -#endif //ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_CLKERNELCOMPONENTS_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp deleted file mode 100644 index 7c805d5368..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ComponentType ClStoreBlockBoundaryAwareKernelComponent::get_component_type() const -{ - return ComponentType::Store; -} - -std::string ClStoreBlockBoundaryAwareKernelComponent::get_component_code() const -{ - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- - - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + (g_x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(g_y, M0, PARTIAL_STORE_M0) * {{dst}}_stride_y); - -#if defined(REINTERPRET_OUTPUT_AS_3D) - // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we - // multiply dst_stride_z by DEPTH_GEMM3D - dst_addr += g_z * {{dst}}_stride_z * DEPTH_GEMM3D; - -#else // defined(REINTERPRET_OUTPUT_AS_3D) - - // Add offset for batched GEMM - dst_addr += g_z * {{dst}}_stride_z; - -#endif // defined(REINTERPRET_OUTPUT_AS_3D) - - STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, {{src}}, dst_addr, {{dst}}_stride_y, g_zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, g_cond_y, g_cond_x); - - //------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- - -)_"; -} - -CLBuildOptions ClStoreBlockBoundaryAwareKernelComponent::generate_build_options() const -{ - auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - // auto tile_info = _blueprint->impl().get_tile_info(); - - CLBuildOptions build_opts{}; - - const auto n0 = _blueprint->impl().get_execution_window().x().step(); - const auto m0 = _blueprint->impl().get_execution_window().y().step(); - const auto partial_m0 = t_dst_info->dimension(0) % m0; - const auto partial_n0 = t_dst_info->dimension(1) % n0; - - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(t_dst_info->data_type())); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0)); - build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_n0)); - - return build_opts; -} - -void ClStoreBlockBoundaryAwareKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Image_3D), "src"); - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Image_3D), "dst"); -} - -ClStoreBlockBoundaryAwareKernelComponent::TagLUT ClStoreBlockBoundaryAwareKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - return { - { "meta_kernel_id", id() }, - { "src", vtable.get(_src) }, - { "dst", vtable.get(_dst) }, - }; -} - -ComponentType ClStoreIndirectWidthSelectKernelComponent::get_component_type() const -{ - return ComponentType::Store; -} - -std::string ClStoreIndirectWidthSelectKernelComponent::get_component_code() const -{ - return R"_( - //------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- - { - // This also follows NHWC layout - // cout maps to global_id(0) maps to Channel - // mout maps to global_id(1) maps to Height and Weight (Collapsed Window) - // bout maps to global_id(3) maps to N / Batch - #define _IDST_WIDTH {{dst}}_w - #define _IDST_HEIGHT {{dst}}_h - TILE(uint, M0, 1, dst_indirect_y); - - // Calculate the destination indirect Y - LOOP_UNROLLING(int, i, 0, 1, M0, - { - dst_indirect_y[i].v = (uint)min(mout + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1); - dst_indirect_y[i].v += bout * (int)(_IDST_WIDTH * _IDST_HEIGHT); - }) - - bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; - - T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, cout, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y); - - #undef _IDST_WIDTH - #undef _IDST_HEIGHT - //------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- - } - -)_"; -} - -CLBuildOptions ClStoreIndirectWidthSelectKernelComponent::generate_build_options() const -{ - CLBuildOptions build_opts{}; - - return build_opts; -} - -void ClStoreIndirectWidthSelectKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const -{ - vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src"); - vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst"); -} - -ClStoreIndirectWidthSelectKernelComponent::TagLUT ClStoreIndirectWidthSelectKernelComponent::get_tag_lut(const SharedVarTable &vtable) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get(_src); - lut["dst"] = vtable.get(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DST_TENSOR_TYPE"] = "BUFFER"; - const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id()); - lut["DST_DATA_TYPE"] = dst_info->data_type(); - - return lut; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h deleted file mode 100644 index e0b188dc8d..0000000000 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION - -#ifndef ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H -#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H - -#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClStoreBlockBoundaryAwareKernelComponent : public IClKernelComponent -{ -public: - ClStoreBlockBoundaryAwareKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) - : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } - { - } - ComponentType get_component_type() const override; - std::string get_component_code() const override; - CLBuildOptions generate_build_options() const override; - TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::vector<Link> get_links() const override - { - return { _src, _dst }; - } - - virtual std::string name() const override - { - return ""; - } - -private: - Link _src{}; - Link _dst{}; -}; - -class ClStoreIndirectWidthSelectKernelComponent : public IClKernelComponent -{ -public: - ClStoreIndirectWidthSelectKernelComponent(ClKernelBlueprint *blueprint, const Link &src, const Link &dst) - : IClKernelComponent(blueprint), _src{ src }, _dst{ dst } - { - } - ComponentType get_component_type() const override; - std::string get_component_code() const override; - CLBuildOptions generate_build_options() const override; - virtual TagLUT get_tag_lut(const SharedVarTable &vtable) const override; - void allocate_shared_vars(SharedVarTable &vtable) const override; - - virtual std::vector<Link> get_links() const override - { - return { _src, _dst }; - } - - virtual std::string name() const override - { - return ""; - } - -private: - Link _src{}; - Link _dst{}; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif // ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMPONENTS_CLSTOREKERNELCOMPONENTS_H -#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
\ No newline at end of file |