diff options
Diffstat (limited to 'src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp')
-rw-r--r-- | src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp | 65 |
1 files changed, 43 insertions, 22 deletions
diff --git a/src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp b/src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp index 54f7ad344a..c4f84e3e45 100644 --- a/src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp +++ b/src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp @@ -30,11 +30,11 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "arm_compute/core/utils/StringUtils.h" + #include "src/core/CL/CLValidate.h" #include "src/core/helpers/WindowHelpers.h" #include "src/core/utils/helpers/tensor_info.h" #include "support/Cast.h" - #include "support/StringSupport.h" namespace arm_compute @@ -45,15 +45,20 @@ namespace kernels { namespace { -Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) +Status validate_arguments(const ITensorInfo *src1, + const ITensorInfo *src2, + const ITensorInfo *src3, + const ITensorInfo *src4, + const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, src3, src4, dst); - ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) + src3->dimension(0) + src4->dimension(0) > dst->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) + src3->dimension(0) + src4->dimension(0) > + dst->dimension(0)); - for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i) + for (size_t i = 1; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i)); ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i)); @@ -71,22 +76,29 @@ ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel() _type = CLKernelType::ELEMENTWISE; } -Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst) +Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, + const ITensorInfo *src2, + const ITensorInfo *src3, + const ITensorInfo *src4, + const ITensorInfo *dst) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, src3, src4, dst)); return Status{}; } void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context, - ITensorInfo *src1, ITensorInfo *src2, - ITensorInfo *src3, ITensorInfo *src4, - ITensorInfo *dst) + ITensorInfo *src1, + ITensorInfo *src2, + ITensorInfo *src3, + ITensorInfo *src4, + ITensorInfo *dst) { ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst); ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, src3, src4, dst)); - auto padding_info = get_padding_info({ src1, src2, src3, src4, dst }); - const unsigned int min_dimension = std::min(std::min(src1->dimension(0), src2->dimension(0)), std::min(src3->dimension(0), src4->dimension(0))); + auto padding_info = get_padding_info({src1, src2, src3, src4, dst}); + const unsigned int min_dimension = + std::min(std::min(src1->dimension(0), src2->dimension(0)), std::min(src3->dimension(0), src4->dimension(0))); const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension); const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration; @@ -96,9 +108,14 @@ void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size())); - build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); - build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) + src3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % + num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT2_ROTATE_N=" + + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) - vec_size_leftover) % + num_elems_processed_per_iteration)); + build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) + + src3->dimension(0) - vec_size_leftover) % + num_elems_processed_per_iteration)); _depth = src1->dimension(2); _input1_width = src1->dimension(0); @@ -106,8 +123,9 @@ void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile _input3_width = src3->dimension(0); // If soources have different quantization info set quantization parameters needed for the re-quantization process - const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2, src3, src4); - if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo) + const bool have_different_qinfo = + helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2, src3, src4); + if (is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo) { const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform(); @@ -166,11 +184,15 @@ void ClWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); - const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); - const auto src2 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2)); - const auto src3 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3)); - auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST)); + const auto src0 = + utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC)); + const auto src1 = + utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1)); + const auto src2 = + utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2)); + const auto src3 = + utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3)); + auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST)); Window slice = window.first_slice_window_4D(); @@ -187,8 +209,7 @@ void ClWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window _kernel.setArg<cl_int>(idx++, _input2_width); _kernel.setArg<cl_int>(idx++, _input3_width); enqueue(queue, *this, window, lws_hint()); - } - while(window.slide_window_slice_4D(slice)); + } while (window.slide_window_slice_4D(slice)); } } // namespace kernels } // namespace opencl |