From 68dd25fbe6e4d3c3513fa5993863419769aa08fc Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Mon, 19 Oct 2020 16:00:11 +0100 Subject: COMPMID-3637: Move utility headers from arm_compute to src Signed-off-by: Georgios Pinitas Change-Id: If9d6fa8c900b68c4b6fd373f2fc1f9abb83ea917 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4145 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Comments-Addressed: Arm Jenkins --- src/runtime/CL/CLHelpers.cpp | 3 +- src/runtime/CL/CLMemory.cpp | 4 +- src/runtime/CL/CLRuntimeContext.cpp | 2 + src/runtime/CL/CLTensorAllocator.cpp | 4 +- src/runtime/CL/functions/CLArgMinMaxLayer.cpp | 11 ++-- src/runtime/CL/functions/CLConcatenateLayer.cpp | 1 + .../functions/CLConvertFullyConnectedWeights.cpp | 2 + src/runtime/CL/functions/CLConvolutionLayer.cpp | 2 + src/runtime/CL/functions/CLCropResize.cpp | 4 ++ src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 2 + .../CL/functions/CLDirectConvolutionLayer.cpp | 2 +- .../CL/functions/CLDirectDeconvolutionLayer.cpp | 1 + src/runtime/CL/functions/CLFFT1D.cpp | 2 +- src/runtime/CL/functions/CLFFTConvolutionLayer.cpp | 5 +- src/runtime/CL/functions/CLFill.cpp | 2 + src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 +- src/runtime/CL/functions/CLGEMM.cpp | 15 +++-- .../CL/functions/CLGEMMConvolutionLayer.cpp | 3 +- .../CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 7 ++- .../CL/functions/CLGenerateProposalsLayer.cpp | 1 + .../CL/functions/CLInstanceNormalizationLayer.cpp | 2 + src/runtime/CL/functions/CLLSTMLayerQuantized.cpp | 1 + src/runtime/CL/functions/CLPriorBoxLayer.cpp | 2 + src/runtime/CL/functions/CLQLSTMLayer.cpp | 1 + src/runtime/CL/functions/CLReduceMean.cpp | 3 +- src/runtime/CL/functions/CLReductionOperation.cpp | 8 ++- src/runtime/CL/functions/CLRemap.cpp | 2 +- src/runtime/CL/functions/CLSelect.cpp | 2 + src/runtime/CL/functions/CLSoftmaxLayer.cpp | 7 ++- src/runtime/CL/functions/CLSplit.cpp | 1 + .../CL/functions/CLWinogradConvolutionLayer.cpp | 2 +- src/runtime/CL/gemm/CLGEMMKernelSelection.h | 65 ++++++++++++++++++++++ .../CL/gemm/CLGEMMKernelSelectionBifrost.cpp | 4 +- src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h | 55 ++++++++++++++++++ .../CL/gemm/CLGEMMKernelSelectionMidgard.cpp | 4 +- src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h | 53 ++++++++++++++++++ .../CL/gemm/CLGEMMKernelSelectionValhall.cpp | 4 +- src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h | 53 ++++++++++++++++++ src/runtime/CL/tuners/BifrostTuner.cpp | 2 +- src/runtime/CL/tuners/MidgardTuner.cpp | 2 +- src/runtime/CPP/CPPScheduler.cpp | 3 +- .../CPP/functions/CPPDetectionOutputLayer.cpp | 1 + .../CPP/functions/CPPDetectionPostProcessLayer.cpp | 1 + src/runtime/CPUUtils.cpp | 9 ++- src/runtime/CPUUtils.h | 51 +++++++++++++++++ src/runtime/DeviceProperties.cpp | 6 +- src/runtime/GLES_COMPUTE/GCMemory.cpp | 4 +- .../GLES_COMPUTE/functions/GCConcatenateLayer.cpp | 2 + src/runtime/IScheduler.cpp | 10 ++-- src/runtime/NEON/INESimpleFunctionNoBorder.cpp | 6 +- src/runtime/NEON/functions/NEArgMinMaxLayer.cpp | 4 +- .../NEON/functions/NEBatchNormalizationLayer.cpp | 4 +- src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp | 4 +- src/runtime/NEON/functions/NEConcatenateLayer.cpp | 1 + src/runtime/NEON/functions/NECropResize.cpp | 2 + .../NEON/functions/NEDeconvolutionLayer.cpp | 1 + src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp | 4 +- src/runtime/NEON/functions/NEFFT1D.cpp | 4 +- .../NEON/functions/NEFFTConvolutionLayer.cpp | 7 ++- .../NEON/functions/NEFullyConnectedLayer.cpp | 2 + src/runtime/NEON/functions/NEGEMM.cpp | 3 +- .../NEON/functions/NEGEMMAssemblyDispatch.cpp | 10 ++-- .../functions/NEGEMMLowpMatrixMultiplyCore.cpp | 1 + .../NEON/functions/NEGenerateProposalsLayer.cpp | 1 + .../NEON/functions/NELSTMLayerQuantized.cpp | 3 +- src/runtime/NEON/functions/NEPadLayer.cpp | 1 + src/runtime/NEON/functions/NEPriorBoxLayer.cpp | 4 +- src/runtime/NEON/functions/NEQLSTMLayer.cpp | 1 + src/runtime/NEON/functions/NEReduceMean.cpp | 3 +- .../NEON/functions/NEReductionOperation.cpp | 1 + src/runtime/NEON/functions/NEScale.cpp | 4 +- .../NEON/functions/NESimpleAssemblyFunction.cpp | 4 +- .../NEON/functions/NESimpleAssemblyFunction.h | 56 +++++++++++++++++++ src/runtime/NEON/functions/NESoftmaxLayer.cpp | 7 ++- .../NEON/functions/NEWinogradConvolutionLayer.cpp | 4 +- .../NEDepthwiseConvolutionAssemblyDispatch.cpp | 11 ++-- src/runtime/OMP/OMPScheduler.cpp | 2 +- src/runtime/SchedulerUtils.cpp | 4 ++ src/runtime/SchedulerUtils.h | 45 +++++++++++++++ src/runtime/Utils.cpp | 7 ++- src/runtime/Utils.h | 60 ++++++++++++++++++++ 81 files changed, 618 insertions(+), 88 deletions(-) create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelection.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h create mode 100644 src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h create mode 100644 src/runtime/CPUUtils.h create mode 100644 src/runtime/NEON/functions/NESimpleAssemblyFunction.h create mode 100644 src/runtime/SchedulerUtils.h create mode 100644 src/runtime/Utils.h (limited to 'src/runtime') diff --git a/src/runtime/CL/CLHelpers.cpp b/src/runtime/CL/CLHelpers.cpp index adfdc3c917..5f1842f76d 100644 --- a/src/runtime/CL/CLHelpers.cpp +++ b/src/runtime/CL/CLHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLRuntimeContext.h" namespace diff --git a/src/runtime/CL/CLMemory.cpp b/src/runtime/CL/CLMemory.cpp index efbc68f50e..a1743c56e6 100644 --- a/src/runtime/CL/CLMemory.cpp +++ b/src/runtime/CL/CLMemory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #include "arm_compute/runtime/CL/CLMemory.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/CL/CLRuntimeContext.cpp b/src/runtime/CL/CLRuntimeContext.cpp index 2fc7f93adf..571e30931c 100644 --- a/src/runtime/CL/CLRuntimeContext.cpp +++ b/src/runtime/CL/CLRuntimeContext.cpp @@ -26,6 +26,8 @@ #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + namespace arm_compute { CLRuntimeContext::CLRuntimeContext() diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp index 90d77883f6..f37fc779fe 100644 --- a/src/runtime/CL/CLTensorAllocator.cpp +++ b/src/runtime/CL/CLTensorAllocator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,8 @@ #include "arm_compute/runtime/CL/CLRuntimeContext.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + namespace arm_compute { const cl::Buffer CLTensorAllocator::_empty_buffer = cl::Buffer(); diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp index ad6e7ba97b..57c4f685f6 100644 --- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp +++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp @@ -24,13 +24,14 @@ #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/Utils.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/Utils.h" namespace arm_compute { @@ -47,7 +48,7 @@ Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITen ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Invalid reduction operation"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= static_cast(TensorShape::num_max_dimensions), "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis); + const unsigned int num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis); DataType output_data_type = DataType::S32; TensorInfo not_reshaped_output; @@ -115,7 +116,7 @@ void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *ou void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); + _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); _reduction_axis = axis; const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false); @@ -172,4 +173,4 @@ void CLArgMinMaxLayer::run() } _reshape.run(); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp index 4214813446..2eb310b893 100644 --- a/src/runtime/CL/functions/CLConcatenateLayer.cpp +++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp @@ -36,6 +36,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp index 4c787673b5..b291ae5b88 100644 --- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp +++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp @@ -23,6 +23,8 @@ */ #include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h" +#include "support/MemorySupport.h" + namespace arm_compute { void CLConvertFullyConnectedWeights::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 630352e4e6..85355f0f17 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + #include #include #include diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp index 529f7bfb3e..6167e9de0a 100644 --- a/src/runtime/CL/functions/CLCropResize.cpp +++ b/src/runtime/CL/functions/CLCropResize.cpp @@ -25,6 +25,10 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + +#include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index cd55336d9a..e6717b6d01 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -29,6 +29,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + #include #include #include diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp index c1055dda36..07e7a18941 100644 --- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp @@ -43,7 +43,7 @@ void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weig } void CLDirectConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, - const PadStrideInfo &conv_info, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { // Set GPU target diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp index 3515c25d82..0ffafa0221 100644 --- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" #include #include diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp index 7d15d33ab5..1269cba90d 100644 --- a/src/runtime/CL/functions/CLFFT1D.cpp +++ b/src/runtime/CL/functions/CLFFT1D.cpp @@ -25,8 +25,8 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/utils/helpers/fft.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp index 1def674bb6..4d0eab81ee 100644 --- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp @@ -26,10 +26,13 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/utils/helpers/fft.h" + +#include "support/MemorySupport.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLFill.cpp b/src/runtime/CL/functions/CLFill.cpp index 6c0f1786f0..a89383ec31 100644 --- a/src/runtime/CL/functions/CLFill.cpp +++ b/src/runtime/CL/functions/CLFill.cpp @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/kernels/CLMemsetKernel.h" #include "arm_compute/core/Types.h" +#include "support/MemorySupport.h" + #include namespace arm_compute diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 4f365b6a61..75e87c382b 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -25,10 +25,10 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/Cast.h" #include "support/MemorySupport.h" #include diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index d56b341abf..ccae6713a6 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -23,10 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLGEMM.h" -#include "arm_compute/core/CL/ICLGEMMKernelConfiguration.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GPUTarget.h" #include "arm_compute/core/Helpers.h" @@ -35,12 +32,18 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/float_ops.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h" #include "arm_compute/runtime/ITensorAllocator.h" +#include "src/core/CL/ICLGEMMKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/utils/helpers/float_ops.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" +#include "support/Cast.h" + +#include "support/MemorySupport.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index ee90b39c2b..e871b39805 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -27,10 +27,11 @@ #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "support/Cast.h" #include #include diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp index 30dce5b8fe..7a8de6c1f5 100644 --- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp @@ -24,8 +24,6 @@ #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h" -#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" @@ -35,7 +33,10 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelection.h" +#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h" +#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp index 45dc402449..5291de074a 100644 --- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp +++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp index fce1fe43a2..4a60ee9d08 100644 --- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp @@ -26,6 +26,8 @@ #include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h" #include "arm_compute/core/Types.h" +#include "support/MemorySupport.h" + namespace arm_compute { CLInstanceNormalizationLayer::CLInstanceNormalizationLayer() diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp index e30b1dbb86..76a531b1c9 100644 --- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp +++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp index 1907c7cc08..fefbff639d 100644 --- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp +++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp @@ -31,6 +31,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + using namespace arm_compute; CLPriorBoxLayer::CLPriorBoxLayer() diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp index 15a54c7928..c493471667 100644 --- a/src/runtime/CL/functions/CLQLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp index 0e2ede7167..4ea7f7642f 100644 --- a/src/runtime/CL/functions/CLReduceMean.cpp +++ b/src/runtime/CL/functions/CLReduceMean.cpp @@ -23,12 +23,13 @@ */ #include "arm_compute/runtime/CL/functions/CLReduceMean.h" -#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/CL/CLValidate.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp index 54e91fb8d8..208371c45d 100644 --- a/src/runtime/CL/functions/CLReductionOperation.cpp +++ b/src/runtime/CL/functions/CLReductionOperation.cpp @@ -30,7 +30,9 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/Utils.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/runtime/Utils.h" + #include "support/MemorySupport.h" namespace arm_compute @@ -47,7 +49,7 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); - const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis); + const unsigned int num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->dimension(0), axis); const bool is_serial = needs_serialized_reduction(op, input->data_type(), axis); const bool is_reshape_required = !keep_dims; @@ -194,7 +196,7 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign void CLReductionOperation::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); + _num_of_stages = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis); _reduction_axis = axis; _is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis); _is_reshape_required = !keep_dims; diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp index 60b72c5f87..1e3d614402 100644 --- a/src/runtime/CL/functions/CLRemap.cpp +++ b/src/runtime/CL/functions/CLRemap.cpp @@ -42,7 +42,7 @@ void CLRemap::configure(ICLTensor *input, const ICLTensor *map_x, const ICLTenso void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, - uint8_t constant_border_value) + uint8_t constant_border_value) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp index c7d7df75d2..ef8010847b 100644 --- a/src/runtime/CL/functions/CLSelect.cpp +++ b/src/runtime/CL/functions/CLSelect.cpp @@ -27,6 +27,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/MemorySupport.h" + using namespace arm_compute; namespace arm_compute diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 720f9111a5..759c8706a1 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -31,6 +31,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/SoftmaxHelpers.h" namespace arm_compute { @@ -63,7 +64,7 @@ void CLSoftmaxLayerGeneric::configure(const CLCompileContext &compile_co { _memory_group.manage(&_input_permuted); _memory_group.manage(&_output_permuted); - _permute_input.configure(compile_context, input, &_input_permuted, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_input.configure(compile_context, input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); tmp_output = &_output_permuted; } @@ -99,7 +100,7 @@ void CLSoftmaxLayerGeneric::configure(const CLCompileContext &compile_co _sum.allocator()->allocate(); if(_needs_permute) { - _permute_output.configure(compile_context, &_output_permuted, output, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_output.configure(compile_context, &_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); _input_permuted.allocator()->allocate(); _output_permuted.allocator()->allocate(); } @@ -117,7 +118,7 @@ Status CLSoftmaxLayerGeneric::validate(const ITensorInfo *input, const I const bool needs_permute = actual_axis != 0; if(needs_permute) { - const PermutationVector permutation_vector = get_permutation_vector_from_softmax_axis(actual_axis); + const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector); TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape)); ARM_COMPUTE_RETURN_ON_ERROR(CLPermute::validate(input, &input_permuted, permutation_vector)); diff --git a/src/runtime/CL/functions/CLSplit.cpp b/src/runtime/CL/functions/CLSplit.cpp index db0b14b9a2..0b27371e3f 100644 --- a/src/runtime/CL/functions/CLSplit.cpp +++ b/src/runtime/CL/functions/CLSplit.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp index 09a35a6f27..7ad017f918 100644 --- a/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp @@ -102,7 +102,7 @@ void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *we } void CLWinogradConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, - const PadStrideInfo &conv_info, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math) { // Get indices for the width and height diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelection.h b/src/runtime/CL/gemm/CLGEMMKernelSelection.h new file mode 100644 index 0000000000..f6fad7e4ff --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelection.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTION_H +#define SRC_CLGEMMKERNELSELECTION_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" + +#include "support/MemorySupport.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** CLGEMMKernelSelection factory class */ +class CLGEMMKernelSelectionFactory final +{ +public: + /** Static method to select the GEMM kernel accordingly with the GPU target and GEMM's dimensionality + * + * @param[in] gpu GPU target + * + * @return CLGEMMKernelSelection class + */ + static std::unique_ptr create(GPUTarget gpu) + { + switch(get_arch_from_target(gpu)) + { + case GPUTarget::MIDGARD: + return support::cpp14::make_unique(gpu); + case GPUTarget::BIFROST: + return support::cpp14::make_unique(gpu); + case GPUTarget::VALHALL: + return support::cpp14::make_unique(gpu); + default: + ARM_COMPUTE_ERROR("Not supported GPU target"); + } + } +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTION_H */ diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp index b1dd690ca5..73b90568f5 100644 --- a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h new file mode 100644 index 0000000000..a495b48301 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionBifrost.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTIONBIFROST_H +#define SRC_CLGEMMKERNELSELECTIONBIFROST_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Bifrost based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionBifrost final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionBifrost(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType g76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType g71_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTIONBIFROST_H */ diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp index 324c2f7dca..d172a827b5 100644 --- a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.cpp @@ -21,12 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" #include "arm_compute/core/GPUTarget.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h new file mode 100644 index 0000000000..3f6003f7dc --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionMidgard.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTIONMIDGARD_H +#define SRC_CLGEMMKERNELSELECTIONMIDGARD_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Midgard based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionMidgard final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionMidgard(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTIONMIDGARD_H */ diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp index c50c7ae76b..acae0e7565 100644 --- a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.cpp @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" +#include "src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/gemm/CLGEMMHelpers.h" +#include "src/core/CL/gemm/CLGEMMHelpers.h" #include #include diff --git a/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h new file mode 100644 index 0000000000..cbea9ea548 --- /dev/null +++ b/src/runtime/CL/gemm/CLGEMMKernelSelectionValhall.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_CLGEMMKERNELSELECTIONVALHALL_H +#define SRC_CLGEMMKERNELSELECTIONVALHALL_H + +#include "arm_compute/runtime/CL/ICLGEMMKernelSelection.h" + +namespace arm_compute +{ +namespace cl_gemm +{ +/** Valhall based OpenCL GEMMKernel selection */ +class CLGEMMKernelSelectionValhall final : public ICLGEMMKernelSelection +{ +public: + /** Constructor + * + * @param[in] gpu GPU target + */ + CLGEMMKernelSelectionValhall(GPUTarget gpu); + + // Inherited overridden method + CLGEMMKernelType select_kernel(const CLGEMMKernelSelectionParams ¶ms) override; + +private: + CLGEMMKernelType default_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); + CLGEMMKernelType default_q8(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant); +}; +} // namespace cl_gemm +} // namespace arm_compute +#endif /* SRC_CLGEMMKERNELSELECTIONVALHALL_H */ diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 52644bf192..a6474c9835 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -25,7 +25,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernels.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/CL/tuners/MidgardTuner.cpp b/src/runtime/CL/tuners/MidgardTuner.cpp index e49e15508b..58b0d579d2 100644 --- a/src/runtime/CL/tuners/MidgardTuner.cpp +++ b/src/runtime/CL/tuners/MidgardTuner.cpp @@ -25,7 +25,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernels.h" -#include "arm_compute/core/utils/misc/Cast.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/CPP/CPPScheduler.cpp b/src/runtime/CPP/CPPScheduler.cpp index f017006de7..e6b0ec20b8 100644 --- a/src/runtime/CPP/CPPScheduler.cpp +++ b/src/runtime/CPP/CPPScheduler.cpp @@ -27,7 +27,8 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" +#include "support/MemorySupport.h" #include "support/Mutex.h" #include diff --git a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp index 9d62733384..fdb4c9f0f6 100644 --- a/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp +++ b/src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp index 3507a3ac45..31f1fafd69 100644 --- a/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp +++ b/src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" #include #include diff --git a/src/runtime/CPUUtils.cpp b/src/runtime/CPUUtils.cpp index 4d6caaee01..a7dd464540 100644 --- a/src/runtime/CPUUtils.cpp +++ b/src/runtime/CPUUtils.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" #include "arm_compute/core/CPP/CPPTypes.h" #include "arm_compute/core/Error.h" @@ -352,6 +352,10 @@ int get_max_cpus() namespace arm_compute { +namespace utils +{ +namespace cpu +{ void get_cpu_configuration(CPUInfo &cpuinfo) { #if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) @@ -460,5 +464,6 @@ unsigned int get_threads_hint() return num_threads_hint; } - +} // namespace cpu +} // namespace utils } // namespace arm_compute diff --git a/src/runtime/CPUUtils.h b/src/runtime/CPUUtils.h new file mode 100644 index 0000000000..452d3d58ca --- /dev/null +++ b/src/runtime/CPUUtils.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_RUNTIME_CPU_UTILS_H +#define ARM_COMPUTE_RUNTIME_CPU_UTILS_H + +namespace arm_compute +{ +class CPUInfo; + +namespace utils +{ +namespace cpu +{ +/** This function will try to detect the CPU configuration on the system and will fill + * the cpuinfo object accordingly to reflect this. + * + * @param[out] cpuinfo @ref CPUInfo to be used to hold the system's cpu configuration. + */ +void get_cpu_configuration(CPUInfo &cpuinfo); +/** Some systems have both big and small cores, this fuction computes the minimum number of cores + * that are exactly the same on the system. To maximize performance the library attempts to process + * workloads concurrently using as many threads as big cores are available on the system. + * + * @return The minumum number of common cores. + */ +unsigned int get_threads_hint(); +} // namespace cpu +} // namespace utils +} // namespace arm_compute +#endif /* ARM_COMPUTE_RUNTIME_CPU_UTILS_H */ diff --git a/src/runtime/DeviceProperties.cpp b/src/runtime/DeviceProperties.cpp index 5d7ae020d7..ec9f4a16ed 100644 --- a/src/runtime/DeviceProperties.cpp +++ b/src/runtime/DeviceProperties.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,12 +23,12 @@ */ #include "arm_compute/runtime/DeviceProperties.h" -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" namespace arm_compute { DeviceProperties::DeviceProperties() { - get_cpu_configuration(cpu_info); + utils::cpu::get_cpu_configuration(cpu_info); } } // namespace arm_compute diff --git a/src/runtime/GLES_COMPUTE/GCMemory.cpp b/src/runtime/GLES_COMPUTE/GCMemory.cpp index 998f8a5cc4..4d74555f4e 100644 --- a/src/runtime/GLES_COMPUTE/GCMemory.cpp +++ b/src/runtime/GLES_COMPUTE/GCMemory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,8 +23,8 @@ */ #include "arm_compute/runtime/GLES_COMPUTE/GCMemory.h" -#include "arm_compute/core/utils/misc/Cast.h" #include "arm_compute/runtime/GLES_COMPUTE/GCMemoryRegion.h" +#include "support/Cast.h" namespace arm_compute { diff --git a/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp index 9e23974b8d..807412eb17 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConcatenateLayer.cpp @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" + #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/IScheduler.cpp b/src/runtime/IScheduler.cpp index 53df3699b0..43df3d5e23 100644 --- a/src/runtime/IScheduler.cpp +++ b/src/runtime/IScheduler.cpp @@ -26,17 +26,17 @@ #include "arm_compute/core/CPP/ICPPKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Window.h" -#include "arm_compute/runtime/CPUUtils.h" -#include "arm_compute/runtime/SchedulerUtils.h" +#include "src/runtime/CPUUtils.h" +#include "src/runtime/SchedulerUtils.h" namespace arm_compute { IScheduler::IScheduler() : _cpu_info() { - get_cpu_configuration(_cpu_info); + utils::cpu::get_cpu_configuration(_cpu_info); // Work out the best possible number of execution threads - _num_threads_hint = get_threads_hint(); + _num_threads_hint = utils::cpu::get_threads_hint(); } CPUInfo &IScheduler::cpu_info() @@ -74,7 +74,7 @@ void IScheduler::schedule_common(ICPPKernel *kernel, const Hints &hints, ITensor //in c++17 this can be swapped for auto [ m_threads, n_threads ] = split_2d(... unsigned m_threads, n_threads; - std::tie(m_threads, n_threads) = split_2d(this->num_threads(), m, n); + std::tie(m_threads, n_threads) = scheduler_utils::split_2d(this->num_threads(), m, n); std::vector workloads; for(unsigned int ni = 0; ni != n_threads; ++ni) diff --git a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp index 82316c49c6..f2181e0a74 100644 --- a/src/runtime/NEON/INESimpleFunctionNoBorder.cpp +++ b/src/runtime/NEON/INESimpleFunctionNoBorder.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/Utils.h" +#include "src/runtime/Utils.h" namespace arm_compute { @@ -36,6 +36,6 @@ INESimpleFunctionNoBorder::INESimpleFunctionNoBorder(IRuntimeContext *ctx) void INESimpleFunctionNoBorder::run() { - schedule_kernel_on_ctx(_ctx, _kernel.get(), Window::DimY); + utils::schedule_kernel_on_ctx(_ctx, _kernel.get(), Window::DimY); } } // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp index 0664d3c9d5..70bbba62ad 100644 --- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp +++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/MemorySupport.h" + namespace arm_compute { NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr memory_manager) diff --git a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp index 5a593e9c74..eab40ac5be 100644 --- a/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + using namespace arm_compute; NEBatchNormalizationLayer::NEBatchNormalizationLayer() diff --git a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp index c06a8aa0e0..2705cffe68 100644 --- a/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/MemorySupport.h" + namespace arm_compute { void NEBatchToSpaceLayer::configure(const ITensor *input, const ITensor *block_shape, ITensor *output) diff --git a/src/runtime/NEON/functions/NEConcatenateLayer.cpp b/src/runtime/NEON/functions/NEConcatenateLayer.cpp index 8df4f4cb62..72bd9e6b19 100644 --- a/src/runtime/NEON/functions/NEConcatenateLayer.cpp +++ b/src/runtime/NEON/functions/NEConcatenateLayer.cpp @@ -35,6 +35,7 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp index f6ed2ec250..f8f99169aa 100644 --- a/src/runtime/NEON/functions/NECropResize.cpp +++ b/src/runtime/NEON/functions/NECropResize.cpp @@ -25,6 +25,8 @@ #include "arm_compute/runtime/NEON/functions/NECropResize.h" +#include "support/MemorySupport.h" + #include namespace arm_compute diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index dff3070239..cb9ab168a7 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" using namespace arm_compute::misc::shape_calculator; diff --git a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp index e363f89482..0aaa37ec92 100644 --- a/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/MemorySupport.h" + namespace arm_compute { void NEDepthToSpaceLayer::configure(const ITensor *input, ITensor *output, int32_t block_shape) diff --git a/src/runtime/NEON/functions/NEFFT1D.cpp b/src/runtime/NEON/functions/NEFFT1D.cpp index 744a91521f..2c53b185df 100644 --- a/src/runtime/NEON/functions/NEFFT1D.cpp +++ b/src/runtime/NEON/functions/NEFFT1D.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,8 +25,8 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/utils/helpers/fft.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp index cd68788145..a46fc9f45f 100644 --- a/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,8 +26,11 @@ #include "arm_compute/core/ITensor.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/helpers/fft.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/utils/helpers/fft.h" + +#include "support/MemorySupport.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index 4dcf41e360..d956d16f4d 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -30,6 +30,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + #include #include diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 3b8ca44ed7..4166cff97a 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -23,7 +23,6 @@ */ #include "arm_compute/runtime/NEON/functions/NEGEMM.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -34,6 +33,8 @@ #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" #include diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp index ad349cb635..5b0848398d 100644 --- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp @@ -23,13 +23,13 @@ */ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "src/core/NEON/kernels/assembly/arm_gemm.hpp" - -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h" - +#include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" #include "src/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h" +#include "src/core/NEON/kernels/assembly/arm_gemm.hpp" + +#include "support/MemorySupport.h" #include diff --git a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp index 83db146a8a..36357dde41 100644 --- a/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp +++ b/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp @@ -33,6 +33,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/helpers/AutoConfiguration.h" #include "support/MemorySupport.h" namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 3d5377892a..13210a06cd 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp index 11989d3225..7610d15787 100644 --- a/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp +++ b/src/runtime/NEON/functions/NELSTMLayerQuantized.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 Arm Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/helpers/AutoConfiguration.h" #include #include diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp index 21c349ba95..03c597a3bf 100644 --- a/src/runtime/NEON/functions/NEPadLayer.cpp +++ b/src/runtime/NEON/functions/NEPadLayer.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp index fda130bf69..bcf6bef9c7 100644 --- a/src/runtime/NEON/functions/NEPriorBoxLayer.cpp +++ b/src/runtime/NEON/functions/NEPriorBoxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,6 +31,8 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + namespace arm_compute { void NEPriorBoxLayer::configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info) diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp index 5a6b51337a..95f20ae1a9 100644 --- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp +++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/WindowHelpers.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEReduceMean.cpp b/src/runtime/NEON/functions/NEReduceMean.cpp index 021f7b530a..c3c5529c09 100644 --- a/src/runtime/NEON/functions/NEReduceMean.cpp +++ b/src/runtime/NEON/functions/NEReduceMean.cpp @@ -23,11 +23,12 @@ */ #include "arm_compute/runtime/NEON/functions/NEReduceMean.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index 91176bfa45..4938a56b3f 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index 2278f07a1c..bbf8343c2b 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -30,12 +30,14 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/utils/misc/Rounding.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/utils/ScaleUtils.h" +#include "support/MemorySupport.h" +#include "support/Rounding.h" + #include #include #include diff --git a/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp b/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp index b0cafae520..d165b2235c 100644 --- a/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp +++ b/src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/functions/NESimpleAssemblyFunction.h" +#include "src/runtime/NEON/functions/NESimpleAssemblyFunction.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" diff --git a/src/runtime/NEON/functions/NESimpleAssemblyFunction.h b/src/runtime/NEON/functions/NESimpleAssemblyFunction.h new file mode 100644 index 0000000000..e9be54d35f --- /dev/null +++ b/src/runtime/NEON/functions/NESimpleAssemblyFunction.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2018-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H +#define ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H + +#include "arm_compute/runtime/IFunction.h" +#include "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single NEON GEMM wrapper kernel to run */ +class NESimpleAssemblyFunction : public IFunction +{ +public: + /** Constructor */ + NESimpleAssemblyFunction(); + + /** Configure the function with the kernel to run + * + * @param[in] kernel GEMM Wrapper kernel configured and ready to run + * + * @note The kernel is expected to have a 1D window. The function will multi-thread this window across the X dimension. + */ + void configure(std::unique_ptr kernel); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ +}; +} //namespace arm_compute +#endif /*ARM_COMPUTE_NESIMPLEASSEMBLYFUNCTION_H */ diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index e763caa3a3..4f773861d2 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/helpers/SoftmaxHelpers.h" namespace arm_compute { @@ -53,7 +54,7 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f // Add to the memory manager _input_permuted _memory_group.manage(&_input_permuted); - _permute_input.configure(input, &_input_permuted, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_input.configure(input, &_input_permuted, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); } // We want to deal with a 2D input. Either it is the permuted version of the original input (4D case) @@ -87,7 +88,7 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f _input_permuted.allocator()->allocate(); // Re-permute the permuted output into the requested (4D) output - _permute_output.configure(&_output_permuted, output, get_permutation_vector_from_softmax_axis(actual_axis)); + _permute_output.configure(&_output_permuted, output, softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis)); // Allocate the intermediate permuted tensors _output_permuted.allocator()->allocate(); @@ -128,7 +129,7 @@ Status NESoftmaxLayerGeneric::validate(const ITensorInfo *input, const I if(needs_permute) { - const PermutationVector permutation_vector = get_permutation_vector_from_softmax_axis(actual_axis); + const PermutationVector permutation_vector = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis); const TensorShape permuted_shape = misc::shape_calculator::compute_permutation_output_shape(*input, permutation_vector); TensorInfo input_permuted(input->clone()->set_tensor_shape(permuted_shape)); ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(input, &input_permuted, permutation_vector)); diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index 1bad310640..23b9f60c38 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -23,17 +23,17 @@ */ #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" +#include "src/core/CPP/Validate.h" #include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h" #include "support/MemorySupport.h" -#include "arm_compute/core/NEON/kernels/convolution/common/utils.hpp" +#include "src/core/NEON/kernels/convolution/common/utils.hpp" #include "src/core/NEON/kernels/convolution/winograd/winograd.hpp" namespace arm_compute diff --git a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp index 73a7caac8b..11e89cb23b 100644 --- a/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp +++ b/src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp @@ -24,18 +24,21 @@ #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h" -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp" -#include "arm_compute/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp" #include "arm_compute/core/Utils.h" #include "arm_compute/core/utils/misc/InfoHelpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/assembly/NEDepthwiseConvolutionAssemblyKernelWrapper.h" +#include "src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp" +#include "src/core/NEON/kernels/convolution/depthwise/depthwise_quantized_dilated.hpp" +#include "src/core/helpers/AutoConfiguration.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "support/MemorySupport.h" + #include namespace arm_compute diff --git a/src/runtime/OMP/OMPScheduler.cpp b/src/runtime/OMP/OMPScheduler.cpp index 4c2f03a53a..bf34b0114b 100644 --- a/src/runtime/OMP/OMPScheduler.cpp +++ b/src/runtime/OMP/OMPScheduler.cpp @@ -27,7 +27,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/Utils.h" -#include "arm_compute/runtime/CPUUtils.h" +#include "src/runtime/CPUUtils.h" #include namespace arm_compute diff --git a/src/runtime/SchedulerUtils.cpp b/src/runtime/SchedulerUtils.cpp index 1c12e3ce58..6f9a32c879 100644 --- a/src/runtime/SchedulerUtils.cpp +++ b/src/runtime/SchedulerUtils.cpp @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "src/runtime/SchedulerUtils.h" #include "arm_compute/core/Error.h" @@ -28,6 +29,8 @@ namespace arm_compute { +namespace scheduler_utils +{ #ifndef BARE_METAL std::pair split_2d(unsigned max_threads, std::size_t m, std::size_t n) { @@ -76,4 +79,5 @@ std::pair split_2d(unsigned max_threads, std::size_t m, std: } } #endif /* #ifndef BARE_METAL */ +} // namespace scheduler_utils } // namespace arm_compute diff --git a/src/runtime/SchedulerUtils.h b/src/runtime/SchedulerUtils.h new file mode 100644 index 0000000000..46644a369e --- /dev/null +++ b/src/runtime/SchedulerUtils.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_COMPUTE_SCHEDULER_UTILS_H +#define SRC_COMPUTE_SCHEDULER_UTILS_H + +#include +#include + +namespace arm_compute +{ +namespace scheduler_utils +{ +/** Given two dimensions and a maximum number of threads to utilise, calculate the best + * combination of threads that fit in (multiplied together) max_threads. + * + * This algorithm assumes that work in either of the dimensions is equally difficult + * to compute + * + * @returns [m_nthreads, n_nthreads] A pair of the threads that should be used in each dimension + */ +std::pair split_2d(unsigned max_threads, std::size_t m, std::size_t n); +} // namespace scheduler_utils +} // namespace arm_compute +#endif /* SRC_COMPUTE_SCHEDULER_UTILS_H */ diff --git a/src/runtime/Utils.cpp b/src/runtime/Utils.cpp index 534b421f8a..15e9d43a49 100644 --- a/src/runtime/Utils.cpp +++ b/src/runtime/Utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 Arm Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/Utils.h" +#include "src/runtime/Utils.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -31,6 +31,8 @@ namespace arm_compute { +namespace utils +{ #ifndef DOXYGEN_SKIP_THIS static const std::string information = #include "arm_compute_version.embed" @@ -78,4 +80,5 @@ unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, un const unsigned int num_of_stages = num_of_wg / 128 + 2; return num_of_stages; } +} // namespace utils } // namespace arm_compute diff --git a/src/runtime/Utils.h b/src/runtime/Utils.h new file mode 100644 index 0000000000..f8775c9612 --- /dev/null +++ b/src/runtime/Utils.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017-2020 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef SRC_RUNTIME_UTILS_H +#define SRC_RUNTIME_UTILS_H + +#include "arm_compute/runtime/IRuntimeContext.h" +#include "arm_compute/runtime/Scheduler.h" + +#include + +namespace arm_compute +{ +namespace utils +{ +/** Convert a Scheduler::Type into a string. + * + * @param[in] t @ref Scheduler::Type to be translated to string. + * + * @return The string describing the scheduler type. + */ +const std::string &string_from_scheduler_type(Scheduler::Type t); + +/** Schedules a kernel using the context if not nullptr else uses the legacy scheduling flow. + * + * @param[in] ctx Context to use. + * @param[in] kernel Kernel to schedule. + * @param[in] hints Hints to use. + */ +void schedule_kernel_on_ctx(IRuntimeContext *ctx, ICPPKernel *kernel, const IScheduler::Hints &hints); + +/** Calculate number of stages for parallel implementations + * + * @param[in] input_x_dimension input tensor x dimension + * @param[in] axis axis to be used + */ +unsigned int calculate_number_of_stages_only_x_axis(size_t input_x_dimension, unsigned int axis); +} // namespace utils +} // namespace arm_compute +#endif /* SRC_RUNTIME_UTILS_H */ -- cgit v1.2.1