From b6eb35371d222c6b7f61210d97ebd7dd9e197458 Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Wed, 8 Aug 2018 13:20:04 +0100 Subject: COMPMID-1478: Stop relying on static default OpenCL objects in cl2.hpp This causes problems when ACL is used as a shared library on Android. Fixes some problems related to creation / destruction order between the Graph's CL backend and core / runtime Change-Id: I716d63fd42f4586df1ffbb6fa97e4db06d3a781b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143228 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Gian Marco Iodice --- arm_compute/core/CL/CLHelpers.h | 2 +- arm_compute/core/CL/CLKernelLibrary.h | 8 ++--- arm_compute/core/CL/ICLKernel.h | 14 ++++++-- arm_compute/core/CL/OpenCL.h | 1 + arm_compute/graph/GraphContext.h | 1 + arm_compute/graph/IDeviceBackend.h | 7 +++- arm_compute/graph/Utils.h | 7 +++- arm_compute/graph/backends/CL/CLDeviceBackend.h | 9 ++--- arm_compute/graph/backends/GLES/GCDeviceBackend.h | 1 + arm_compute/graph/backends/NEON/NEDeviceBackend.h | 1 + arm_compute/graph/frontend/Stream.h | 6 ++-- arm_compute/runtime/CL/CLScheduler.h | 4 +-- scripts/clang_tidy_rules.py | 2 +- src/core/CL/CLHelpers.cpp | 2 +- src/core/CL/CLKernelLibrary.cpp | 3 +- src/core/CL/ICLSimple2DKernel.cpp | 4 +-- src/core/CL/ICLSimple3DKernel.cpp | 4 +-- src/core/CL/ICLSimpleKernel.cpp | 4 +-- src/core/CL/OpenCL.cpp | 21 +++++++++++ src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp | 4 +-- src/core/CL/kernels/CLActivationLayerKernel.cpp | 4 +-- src/core/CL/kernels/CLArithmeticAdditionKernel.cpp | 2 +- src/core/CL/kernels/CLArithmeticDivisionKernel.cpp | 2 +- .../CL/kernels/CLArithmeticSubtractionKernel.cpp | 2 +- .../CL/kernels/CLBatchNormalizationLayerKernel.cpp | 4 +-- src/core/CL/kernels/CLBitwiseAndKernel.cpp | 4 +-- src/core/CL/kernels/CLBitwiseOrKernel.cpp | 4 +-- src/core/CL/kernels/CLBitwiseXorKernel.cpp | 4 +-- src/core/CL/kernels/CLBox3x3Kernel.cpp | 4 +-- src/core/CL/kernels/CLCannyEdgeKernel.cpp | 8 ++--- src/core/CL/kernels/CLChannelCombineKernel.cpp | 4 +-- src/core/CL/kernels/CLChannelExtractKernel.cpp | 4 +-- .../CL/kernels/CLChannelShuffleLayerKernel.cpp | 2 +- src/core/CL/kernels/CLCol2ImKernel.cpp | 4 +-- src/core/CL/kernels/CLColorConvertKernel.cpp | 8 ++--- .../CLConvertFullyConnectedWeightsKernel.cpp | 4 +-- src/core/CL/kernels/CLConvolutionKernel.cpp | 8 ++--- src/core/CL/kernels/CLCopyKernel.cpp | 2 +- .../kernels/CLDeconvolutionLayerUpsampleKernel.cpp | 2 +- .../CL/kernels/CLDepthConcatenateLayerKernel.cpp | 2 +- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 4 +-- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 4 +-- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 4 +-- .../CL/kernels/CLDepthwiseVectorToTensorKernel.cpp | 2 +- .../CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp | 2 +- .../CL/kernels/CLDequantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLDerivativeKernel.cpp | 4 +-- src/core/CL/kernels/CLDilateKernel.cpp | 4 +-- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 4 +-- .../CLDirectConvolutionOutputStageKernel.cpp | 4 +-- src/core/CL/kernels/CLErodeKernel.cpp | 4 +-- src/core/CL/kernels/CLFastCornersKernel.cpp | 4 +-- src/core/CL/kernels/CLFillBorderKernel.cpp | 2 +- src/core/CL/kernels/CLFlattenLayerKernel.cpp | 6 ++-- src/core/CL/kernels/CLFloorKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp | 4 +-- .../CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp | 4 +-- .../kernels/CLGEMMLowpOffsetContributionKernel.cpp | 2 +- ...tizeDownInt32ToUint8ScaleByFixedPointKernel.cpp | 6 ++-- ...GEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp | 4 +-- src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp | 6 ++-- .../kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp | 4 +-- src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp | 2 +- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 6 ++-- .../kernels/CLGEMMMatrixVectorMultiplyKernel.cpp | 4 +-- src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp | 4 +-- src/core/CL/kernels/CLGaussian3x3Kernel.cpp | 4 +-- src/core/CL/kernels/CLGaussianPyramidKernel.cpp | 4 +-- src/core/CL/kernels/CLHOGDescriptorKernel.cpp | 4 +-- src/core/CL/kernels/CLHOGDetectorKernel.cpp | 2 +- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 4 +-- src/core/CL/kernels/CLHistogramKernel.cpp | 6 ++-- src/core/CL/kernels/CLIm2ColKernel.cpp | 6 ++-- src/core/CL/kernels/CLIntegralImageKernel.cpp | 6 ++-- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLLKTrackerKernel.cpp | 8 ++--- .../CLLocallyConnectedMatrixMultiplyKernel.cpp | 9 ++--- src/core/CL/kernels/CLMagnitudePhaseKernel.cpp | 4 +-- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 2 +- src/core/CL/kernels/CLMedian3x3Kernel.cpp | 4 +-- src/core/CL/kernels/CLMinMaxLayerKernel.cpp | 2 +- src/core/CL/kernels/CLMinMaxLocationKernel.cpp | 6 ++-- src/core/CL/kernels/CLNonLinearFilterKernel.cpp | 4 +-- .../CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp | 4 +-- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLPermuteKernel.cpp | 2 +- .../CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 2 +- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 6 ++-- src/core/CL/kernels/CLQuantizationLayerKernel.cpp | 2 +- src/core/CL/kernels/CLROIPoolingLayerKernel.cpp | 2 +- src/core/CL/kernels/CLReductionOperationKernel.cpp | 10 +++--- src/core/CL/kernels/CLRemapKernel.cpp | 4 +-- src/core/CL/kernels/CLReshapeLayerKernel.cpp | 2 +- src/core/CL/kernels/CLScaleKernel.cpp | 6 ++-- src/core/CL/kernels/CLScharr3x3Kernel.cpp | 4 +-- src/core/CL/kernels/CLSobel3x3Kernel.cpp | 4 +-- src/core/CL/kernels/CLSobel5x5Kernel.cpp | 6 ++-- src/core/CL/kernels/CLSobel7x7Kernel.cpp | 6 ++-- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 12 +++---- src/core/CL/kernels/CLTransposeKernel.cpp | 7 ++-- src/core/CL/kernels/CLWarpAffineKernel.cpp | 2 +- src/core/CL/kernels/CLWarpPerspectiveKernel.cpp | 2 +- src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 2 +- .../CL/kernels/CLWidthConcatenateLayerKernel.cpp | 2 +- .../CL/kernels/CLWinogradFilterTransformKernel.cpp | 2 +- .../CL/kernels/CLWinogradInputTransformKernel.cpp | 6 ++-- .../CL/kernels/CLWinogradOutputTransformKernel.cpp | 4 +-- src/graph/GraphContext.cpp | 12 +++++-- src/graph/Utils.cpp | 10 +++++- src/graph/backends/CL/CLDeviceBackend.cpp | 16 +++++++-- src/graph/backends/GLES/GCDeviceBackend.cpp | 8 ++++- src/graph/backends/NEON/NEDeviceBackend.cpp | 9 ++++- src/graph/frontend/Stream.cpp | 4 +-- src/runtime/CL/CLScheduler.cpp | 41 +++++++++++++++------- tests/framework/Framework.cpp | 2 +- tests/validation/CL/UNIT/Tuner.cpp | 7 ++-- 116 files changed, 332 insertions(+), 236 deletions(-) diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index ca1345d807..18d6bdf49f 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -69,7 +69,7 @@ std::string get_underlying_cl_type_from_data_type(const DataType &dt); * * @return the GPU target */ -GPUTarget get_target_from_device(cl::Device &device); +GPUTarget get_target_from_device(const cl::Device &device); /** Helper function to get the highest OpenCL version supported * diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h index 18b6bb4434..c1999b45e1 100644 --- a/arm_compute/core/CL/CLKernelLibrary.h +++ b/arm_compute/core/CL/CLKernelLibrary.h @@ -208,11 +208,11 @@ public: static CLKernelLibrary &get(); /** Initialises the kernel library. * - * @param[in] kernel_path (Optional) Path of the directory from which kernel sources are loaded. - * @param[in] context (Optional) CL context used to create programs. - * @param[in] device (Optional) CL device for which the programs are created. + * @param[in] kernel_path Path of the directory from which kernel sources are loaded. + * @param[in] context CL context used to create programs. + * @param[in] device CL device for which the programs are created. */ - void init(std::string kernel_path = ".", cl::Context context = cl::Context::getDefault(), cl::Device device = cl::Device::getDefault()) + void init(std::string kernel_path, cl::Context context, cl::Device device) { _kernel_path = std::move(kernel_path); _context = std::move(context); diff --git a/arm_compute/core/CL/ICLKernel.h b/arm_compute/core/CL/ICLKernel.h index 9e970641c1..150dd62a89 100644 --- a/arm_compute/core/CL/ICLKernel.h +++ b/arm_compute/core/CL/ICLKernel.h @@ -61,11 +61,17 @@ private: { return 2 + 2 * dimension_size; } - + using IKernel::configure; //Prevent children from calling IKernel::configure() directly public: + void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange()) + { + _lws_hint = lws_hint; + IKernel::configure(window); + } + /** Constructor */ ICLKernel() - : _kernel(nullptr), _lws_hint(CLKernelLibrary::get().default_ndrange()), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0) + : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint() { } /** Returns a reference to the OpenCL kernel of this object. @@ -196,6 +202,7 @@ public: */ void set_lws_hint(const cl::NDRange &lws_hint) { + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure() _lws_hint = lws_hint; } @@ -282,10 +289,11 @@ private: protected: cl::Kernel _kernel; /**< OpenCL kernel to run */ - cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ GPUTarget _target; /**< The targeted GPU */ std::string _config_id; /**< Configuration ID */ size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */ +private: + cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */ }; /** Add the kernel to the command queue with the given window. diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 33053308ec..468e1792f0 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -82,6 +82,7 @@ public: #define DECLARE_FUNCTION_PTR(func_name) \ std::function func_name##_ptr = nullptr + DECLARE_FUNCTION_PTR(clCreateContext); DECLARE_FUNCTION_PTR(clCreateContextFromType); DECLARE_FUNCTION_PTR(clCreateCommandQueue); DECLARE_FUNCTION_PTR(clGetContextInfo); diff --git a/arm_compute/graph/GraphContext.h b/arm_compute/graph/GraphContext.h index 1831cc2c8b..ce6f86f611 100644 --- a/arm_compute/graph/GraphContext.h +++ b/arm_compute/graph/GraphContext.h @@ -50,6 +50,7 @@ class GraphContext final public: /** Constructor */ GraphContext(); + ~GraphContext(); /** Prevent instances of this class from being copied (As this class contains pointers) */ GraphContext(const GraphContext &) = delete; /** Default move constructor */ diff --git a/arm_compute/graph/IDeviceBackend.h b/arm_compute/graph/IDeviceBackend.h index f28cb1ab42..358d26af81 100644 --- a/arm_compute/graph/IDeviceBackend.h +++ b/arm_compute/graph/IDeviceBackend.h @@ -53,9 +53,14 @@ public: virtual void initialize_backend() = 0; /** Setups the given graph context * - * @param[in] ctx Graph context + * @param[in,out] ctx Graph context */ virtual void setup_backend_context(GraphContext &ctx) = 0; + /** Release the backend specific resources associated to a given graph context + * + * @param[in,out] ctx Graph context + */ + virtual void release_backend_context(GraphContext &ctx) = 0; /** Checks if an instantiated backend is actually supported * * @return True if the backend is supported else false diff --git a/arm_compute/graph/Utils.h b/arm_compute/graph/Utils.h index 582d47e406..3604bad4af 100644 --- a/arm_compute/graph/Utils.h +++ b/arm_compute/graph/Utils.h @@ -91,9 +91,14 @@ void force_target_to_graph(Graph &g, Target target); PassManager create_default_pass_manager(Target target); /** Default setups the graph context if not done manually * - * @param[in] ctx Graph Context + * @param[in,out] ctx Graph Context */ void setup_default_graph_context(GraphContext &ctx); +/** Default releases the graph context if not done manually + * + * @param[in,out] ctx Graph Context + */ +void release_default_graph_context(GraphContext &ctx); /** Get size of a tensor's given dimension depending on its layout * * @param[in] descriptor Descriptor diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index c1a6a28e6c..cc8d55239e 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -54,6 +54,7 @@ public: // Inherited overridden methods void initialize_backend() override; void setup_backend_context(GraphContext &ctx) override; + void release_backend_context(GraphContext &ctx) override; bool is_backend_supported() override; IAllocator *backend_allocator() override; std::unique_ptr create_tensor(const Tensor &tensor) override; @@ -63,10 +64,10 @@ public: std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; private: - bool _initialized; /**< Flag that specifies if the backend has been default initialized */ - CLTuner _tuner; /**< CL kernel tuner */ - std::unique_ptr _allocator; /**< CL buffer affinity allocator */ - std::string _tuner_file; /** Filename to load/store the tuner's values from */ + int _context_count; /**< Counts how many contexts are currently using the backend */ + CLTuner _tuner; /**< CL kernel tuner */ + std::unique_ptr _allocator; /**< CL buffer affinity allocator */ + std::string _tuner_file; /** Filename to load/store the tuner's values from */ }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h index ba789221e3..ca2d3734eb 100644 --- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h +++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h @@ -44,6 +44,7 @@ public: // Inherited overridden methods void initialize_backend() override; void setup_backend_context(GraphContext &ctx) override; + void release_backend_context(GraphContext &ctx) override; bool is_backend_supported() override; IAllocator *backend_allocator() override; std::unique_ptr create_tensor(const Tensor &tensor) override; diff --git a/arm_compute/graph/backends/NEON/NEDeviceBackend.h b/arm_compute/graph/backends/NEON/NEDeviceBackend.h index c1e2e0c078..abc17d9e83 100644 --- a/arm_compute/graph/backends/NEON/NEDeviceBackend.h +++ b/arm_compute/graph/backends/NEON/NEDeviceBackend.h @@ -43,6 +43,7 @@ public: // Inherited overridden methods void initialize_backend() override; void setup_backend_context(GraphContext &ctx) override; + void release_backend_context(GraphContext &ctx) override; bool is_backend_supported() override; IAllocator *backend_allocator() override; std::unique_ptr create_tensor(const Tensor &tensor) override; diff --git a/arm_compute/graph/frontend/Stream.h b/arm_compute/graph/frontend/Stream.h index 244d18e753..c8e24eeae2 100644 --- a/arm_compute/graph/frontend/Stream.h +++ b/arm_compute/graph/frontend/Stream.h @@ -74,11 +74,13 @@ public: const Graph &graph() const override; private: - GraphManager _manager; /**< Graph manager */ + //Important: GraphContext must be declared *before* the GraphManager because the GraphManager + //allocates resources from the context and therefore needs to be destroyed before the context during clean up). GraphContext _ctx; /**< Graph context to use */ + GraphManager _manager; /**< Graph manager */ Graph _g; /**< Internal graph representation of the stream */ }; } // namespace frontend } // namespace graph } // namespace arm_compute -#endif /* __ARM_COMPUTE_GRAPH_STREAM_H__ */ \ No newline at end of file +#endif /* __ARM_COMPUTE_GRAPH_STREAM_H__ */ diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 8eb287c942..807d8bb448 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -74,8 +74,8 @@ public: * @param[in] cl_tuner (Optional) Pointer to OpenCL tuner (default=nullptr) * Note: It is caller's responsibility to release the allocated memory for CLTuner */ - void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(), - cl::Device device = cl::Device::getDefault(), ICLTuner *cl_tuner = nullptr) + void init(cl::Context context, cl::CommandQueue queue, + cl::Device device, ICLTuner *cl_tuner = nullptr) { set_context(context); _queue = std::move(queue); diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 6c5b8ca26b..0de69de703 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -48,10 +48,10 @@ def filter_clang_tidy_lines( lines ): if "error:" in line: if (("Utils.cpp" in line and "'arm_compute_version.embed' file not found" in line) or - ("cl2.hpp" in line and "cast from pointer to smaller type 'cl_context_properties' (aka 'int') loses information" in line) or ("arm_fp16.h" in line) or ("omp.h" in line) or ("cast from pointer to smaller type 'uintptr_t' (aka 'unsigned int') loses information" in line) or + ("cast from pointer to smaller type 'cl_context_properties' (aka 'int') loses information" in line) or ("cast from pointer to smaller type 'std::uintptr_t' (aka 'unsigned int') loses information" in line) or ("NEMath.inl" in line and "statement expression not allowed at file scope" in line) or ("Utils.h" in line and "no member named 'unmap' in 'arm_compute::Tensor'" in line) or diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp index de78786eb4..9703b0fe16 100644 --- a/src/core/CL/CLHelpers.cpp +++ b/src/core/CL/CLHelpers.cpp @@ -94,7 +94,7 @@ std::string get_underlying_cl_type_from_data_type(const DataType &dt) return get_cl_type_from_data_type(dt); } -GPUTarget get_target_from_device(cl::Device &device) +GPUTarget get_target_from_device(const cl::Device &device) { // Query device name size std::string device_name = device.getInfo(); diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 29b01e6cea..3c92257642 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -966,8 +966,7 @@ size_t CLKernelLibrary::max_local_workgroup_size(const cl::Kernel &kernel) const cl::NDRange CLKernelLibrary::default_ndrange() const { - cl::Device device = cl::Device::getDefault(); - GPUTarget _target = get_target_from_device(device); + GPUTarget _target = get_target_from_device(_device); cl::NDRange default_range; switch(_target) diff --git a/src/core/CL/ICLSimple2DKernel.cpp b/src/core/CL/ICLSimple2DKernel.cpp index 5dc3e6c8bb..cf6c9c8785 100644 --- a/src/core/CL/ICLSimple2DKernel.cpp +++ b/src/core/CL/ICLSimple2DKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,7 +42,7 @@ void ICLSimple2DKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/ICLSimple3DKernel.cpp b/src/core/CL/ICLSimple3DKernel.cpp index 0bd9d155cf..4197307caf 100644 --- a/src/core/CL/ICLSimple3DKernel.cpp +++ b/src/core/CL/ICLSimple3DKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,7 @@ void ICLSimple3DKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/ICLSimpleKernel.cpp b/src/core/CL/ICLSimpleKernel.cpp index fec9d923da..48e5a884f0 100644 --- a/src/core/CL/ICLSimpleKernel.cpp +++ b/src/core/CL/ICLSimpleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,5 +50,5 @@ void ICLSimpleKernel::configure(const ICLTensor *input, ICLTensor *output, unsig output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp index a8ed9733ef..486bb6a1bd 100644 --- a/src/core/CL/OpenCL.cpp +++ b/src/core/CL/OpenCL.cpp @@ -74,6 +74,7 @@ bool CLSymbols::load(const std::string &library) #define LOAD_FUNCTION_PTR(func_name, handle) \ func_name##_ptr = reinterpret_cast(dlsym(handle, #func_name)); + LOAD_FUNCTION_PTR(clCreateContext, handle); LOAD_FUNCTION_PTR(clCreateContextFromType, handle); LOAD_FUNCTION_PTR(clCreateCommandQueue, handle); LOAD_FUNCTION_PTR(clGetContextInfo, handle); @@ -254,6 +255,26 @@ cl_command_queue clCreateCommandQueue(cl_context context, } } +cl_context clCreateContext( + const cl_context_properties *properties, + cl_uint num_devices, + const cl_device_id *devices, + void (*pfn_notify)(const char *, const void *, size_t, void *), + void *user_data, + cl_int *errcode_ret) +{ + arm_compute::CLSymbols::get().load_default(); + auto func = arm_compute::CLSymbols::get().clCreateContext_ptr; + if(func != nullptr) + { + return func(properties, num_devices, devices, pfn_notify, user_data, errcode_ret); + } + else + { + return nullptr; + } +} + cl_context clCreateContextFromType(const cl_context_properties *properties, cl_device_type device_type, void (*pfn_notify)(const char *, const void *, size_t, void *), diff --git a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp index 685b8e234e..0c1206adfb 100644 --- a/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp +++ b/src/core/CL/kernels/CLAbsoluteDifferenceKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -81,7 +81,7 @@ void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTen output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLAbsoluteDifferenceKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d8bd2f7ee1..a15e99b8d4 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -179,7 +179,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "activation_layer_"; @@ -215,7 +215,7 @@ void CLActivationLayerKernel::run(const Window &window, cl::CommandQueue &queue) { add_3D_tensor_argument(idx, _output, slice); } - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp index 6d6cb6f98c..2372d458cf 100644 --- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp @@ -154,7 +154,7 @@ void CLArithmeticAdditionKernel::configure(const ICLTensor *input1, const ICLTen // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLArithmeticAdditionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) diff --git a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp index 9bd0da15a3..e995ba1a41 100644 --- a/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticDivisionKernel.cpp @@ -121,7 +121,7 @@ void CLArithmeticDivisionKernel::configure(const ICLTensor *input1, const ICLTen // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("arithmetic_div", build_opts)); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLArithmeticDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp index aeee6022a7..299ac553e9 100644 --- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp @@ -127,7 +127,7 @@ void CLArithmeticSubtractionKernel::configure(const ICLTensor *input1, const ICL // Configure kernel window auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLArithmeticSubtractionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy) diff --git a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp index 4c93fb28bf..d4a72076c1 100644 --- a/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp @@ -189,7 +189,7 @@ void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *out (beta != nullptr) ? beta->info() : nullptr, (gamma != nullptr) ? gamma->info() : nullptr); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); _config_id = "batch_normalization_layer_"; _config_id += string_from_data_layout(input->info()->data_layout()); @@ -252,7 +252,7 @@ void CLBatchNormalizationLayerKernel::run(const Window &window, cl::CommandQueue { add_3D_tensor_argument(idx, _output, slice); } - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLBitwiseAndKernel.cpp b/src/core/CL/kernels/CLBitwiseAndKernel.cpp index 5ea4a86da5..dd301cd02e 100644 --- a/src/core/CL/kernels/CLBitwiseAndKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseAndKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,7 +66,7 @@ void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *inp output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLBitwiseAndKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLBitwiseOrKernel.cpp b/src/core/CL/kernels/CLBitwiseOrKernel.cpp index 2eeef0a993..aa84618258 100644 --- a/src/core/CL/kernels/CLBitwiseOrKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseOrKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,7 +67,7 @@ void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *inpu output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLBitwiseOrKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLBitwiseXorKernel.cpp b/src/core/CL/kernels/CLBitwiseXorKernel.cpp index c19a78e1c4..ad1f923253 100644 --- a/src/core/CL/kernels/CLBitwiseXorKernel.cpp +++ b/src/core/CL/kernels/CLBitwiseXorKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,7 +67,7 @@ void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *inp output_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLBitwiseXorKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLBox3x3Kernel.cpp b/src/core/CL/kernels/CLBox3x3Kernel.cpp index 0299f6233c..b81697f778 100644 --- a/src/core/CL/kernels/CLBox3x3Kernel.cpp +++ b/src/core/CL/kernels/CLBox3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -73,5 +73,5 @@ void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool b output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLCannyEdgeKernel.cpp b/src/core/CL/kernels/CLCannyEdgeKernel.cpp index 5d06d34631..94e5e230f9 100644 --- a/src/core/CL/kernels/CLCannyEdgeKernel.cpp +++ b/src/core/CL/kernels/CLCannyEdgeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,7 +77,7 @@ void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTe mag_access.set_valid_region(win, _gx->info()->valid_region()); phase_access.set_valid_region(win, _gx->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLGradientKernel::run(const Window &window, cl::CommandQueue &queue) @@ -145,7 +145,7 @@ void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLEdgeNonMaxSuppressionKernel::run(const Window &window, cl::CommandQueue &queue) @@ -230,7 +230,7 @@ void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int l1_stack_access.set_valid_region(win, _input->info()->valid_region()); l1_stack_counter_access.set_valid_region(win, _input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLEdgeTraceKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp index 6e55e666ee..c7b1da41dc 100644 --- a/src/core/CL/kernels/CLChannelCombineKernel.cpp +++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp @@ -128,7 +128,7 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor } output_access.set_valid_region(win, ValidRegion(valid_region.anchor, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output) @@ -232,7 +232,7 @@ void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *p output_plane1_access.set_valid_region(win, ValidRegion(output_plane1_region.anchor, output->plane(1)->info()->tensor_shape())); output_plane2_access.set_valid_region(win, ValidRegion(plane2->info()->valid_region().anchor, output->plane(2)->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLChannelExtractKernel.cpp b/src/core/CL/kernels/CLChannelExtractKernel.cpp index 65843b8d5d..8bddba837a 100644 --- a/src/core/CL/kernels/CLChannelExtractKernel.cpp +++ b/src/core/CL/kernels/CLChannelExtractKernel.cpp @@ -101,7 +101,7 @@ void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, ValidRegion input_valid_region = input->info()->valid_region(); output_access.set_valid_region(win, ValidRegion(input_valid_region.anchor, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output) @@ -162,7 +162,7 @@ void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel chann output_access.set_valid_region(win, input_plane->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLChannelExtractKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp index 5f0f0aebf8..be4d68770d 100644 --- a/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp +++ b/src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp @@ -125,7 +125,7 @@ void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *o // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 6274c9082a..6fd3be7f6a 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -111,7 +111,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "col2im_"; @@ -156,7 +156,7 @@ void CLCol2ImKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(collapsed_window.slide_window_slice_2D(slice) && out_window.slide_window_slice_3D(slice_out)); } diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp index 2b894989e1..e79019eab9 100644 --- a/src/core/CL/kernels/CLColorConvertKernel.cpp +++ b/src/core/CL/kernels/CLColorConvertKernel.cpp @@ -120,7 +120,7 @@ void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output) output_access.set_valid_region(win, input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output) @@ -189,7 +189,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *outpu input->plane(2)->info()->valid_region()); output_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output) @@ -285,7 +285,7 @@ void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *outpu output_plane1_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(1)->info()->tensor_shape())); output_plane2_access.set_valid_region(win, ValidRegion(input_region.anchor, output->plane(2)->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output) @@ -369,7 +369,7 @@ void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage * output_plane1_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(1)->info()->tensor_shape())); output_plane2_access.set_valid_region(win, ValidRegion(intersect_region.anchor, output->plane(2)->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLColorConvertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp index 69ab590540..ace3fd5840 100644 --- a/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp +++ b/src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp @@ -73,7 +73,7 @@ void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICL // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLConvertFullyConnectedWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, @@ -109,4 +109,4 @@ void CLConvertFullyConnectedWeightsKernel::run(const Window &window, cl::Command add_2D_tensor_argument(idx, _output, window); enqueue(queue, *this, window); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp index 2b08c8dfba..e6777938a2 100644 --- a/src/core/CL/kernels/CLConvolutionKernel.cpp +++ b/src/core/CL/kernels/CLConvolutionKernel.cpp @@ -105,7 +105,7 @@ void CLConvolutionKernel::configure(const ICLTensor *input, ICLTens output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } /****************************************************************************************\ @@ -167,7 +167,7 @@ void CLSeparableConvolutionHorKernel::configure(const ICLTensor *in output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } template @@ -226,7 +226,7 @@ void CLSeparableConvolutionVertKernel::configure(const ICLTensor *i output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } /****************************************************************************************\ @@ -298,7 +298,7 @@ void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor * output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLConvolutionRectangleKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLCopyKernel.cpp b/src/core/CL/kernels/CLCopyKernel.cpp index 1fc8b5bfbe..2da67d2666 100644 --- a/src/core/CL/kernels/CLCopyKernel.cpp +++ b/src/core/CL/kernels/CLCopyKernel.cpp @@ -95,7 +95,7 @@ void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output) // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLCopyKernel::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output) diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index 1feac7d815..c6a0031f4a 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -85,7 +85,7 @@ void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTe AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 4055d1c7ab..40023948b1 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -125,7 +125,7 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input, diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index e091e5c2cb..a40aa2856c 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -280,7 +280,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, depth_multiplier, gpu_target, kernel_name); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); @@ -345,7 +345,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::run(const Window &window, cl::Com add_3D_tensor_argument(idx, _output, slice_out); add_3D_tensor_argument(idx, _weights, slice_weights); - enqueue(queue, *this, slice_out, _lws_hint); + enqueue(queue, *this, slice_out, lws_hint()); } while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 610bfb51dd..63c350d9a5 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -244,7 +244,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, // Configure kernel window auto win_config = validate_and_configure_window(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = kernel_name; @@ -314,7 +314,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com add_3D_tensor_argument(idx, _output, slice_out); add_3D_tensor_argument(idx, _weights, slice_out); - enqueue(queue, *this, slice_out, _lws_hint); + enqueue(queue, *this, slice_out, lws_hint()); } while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index cab943629a..d5c333a2c1 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -101,7 +101,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu // CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLDepthwiseIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier) @@ -135,7 +135,7 @@ void CLDepthwiseIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp index 67b2cc9f55..cdc27e8ab1 100644 --- a/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseVectorToTensorKernel.cpp @@ -87,7 +87,7 @@ void CLDepthwiseVectorToTensorKernel::configure(const ICLTensor *input, ICLTenso // The CLDepthwisevectorToTensorKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLDepthwiseVectorToTensorKernel::validate(const ITensorInfo *input, const ITensorInfo *output, size_t conv_w, size_t conv_h) diff --git a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp index c28be3fccf..683dda8d67 100644 --- a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp @@ -95,7 +95,7 @@ void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTenso // The CLDepthwiseWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLDepthwiseWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases) diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index fba721f50b..d4c1bec5f4 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -96,7 +96,7 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max) diff --git a/src/core/CL/kernels/CLDerivativeKernel.cpp b/src/core/CL/kernels/CLDerivativeKernel.cpp index 5bfe75140b..af7df14359 100644 --- a/src/core/CL/kernels/CLDerivativeKernel.cpp +++ b/src/core/CL/kernels/CLDerivativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -117,7 +117,7 @@ void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLDerivativeKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLDilateKernel.cpp b/src/core/CL/kernels/CLDilateKernel.cpp index 3abd747011..89853d7b19 100644 --- a/src/core/CL/kernels/CLDilateKernel.cpp +++ b/src/core/CL/kernels/CLDilateKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,5 +61,5 @@ void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool b output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 754f0d8f23..6de97d40af 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -442,7 +442,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL // Configure kernel window auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, gpu_target); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set static kernel arguments if(is_data_type_quantized_asymmetric(data_type)) @@ -532,7 +532,7 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice_in); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp index 4e2352cf6e..5f4dacb269 100644 --- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp @@ -168,7 +168,7 @@ void CLDirectConvolutionLayerOutputStageKernel::configure(ICLTensor *input, cons // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLDirectConvolutionLayerOutputStageKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output) @@ -202,7 +202,7 @@ void CLDirectConvolutionLayerOutputStageKernel::run(const Window &window, cl::Co unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLErodeKernel.cpp b/src/core/CL/kernels/CLErodeKernel.cpp index a7aa88fc5c..e56b71a75e 100644 --- a/src/core/CL/kernels/CLErodeKernel.cpp +++ b/src/core/CL/kernels/CLErodeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,5 +61,5 @@ void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool bo output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLFastCornersKernel.cpp b/src/core/CL/kernels/CLFastCornersKernel.cpp index 616e41b5fc..782ab7a7c0 100644 --- a/src/core/CL/kernels/CLFastCornersKernel.cpp +++ b/src/core/CL/kernels/CLFastCornersKernel.cpp @@ -87,7 +87,7 @@ void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, flo output_access.set_valid_region(win, input->info()->valid_region(), border_mode == BorderMode::UNDEFINED, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLFastCornersKernel::run(const Window &window, cl::CommandQueue &queue) @@ -148,7 +148,7 @@ void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, I Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLCopyToArrayKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index 3b1edaf46c..baf6bb6024 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -154,7 +154,7 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height)); win.set(Window::DimY, Window::Dimension(0, 1, 1)); win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLFillBorderKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLFlattenLayerKernel.cpp b/src/core/CL/kernels/CLFlattenLayerKernel.cpp index 0b5feffcc9..17189143ef 100644 --- a/src/core/CL/kernels/CLFlattenLayerKernel.cpp +++ b/src/core/CL/kernels/CLFlattenLayerKernel.cpp @@ -101,7 +101,7 @@ void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output) // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "flatten"; @@ -144,8 +144,8 @@ void CLFlattenLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_1D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, _lws_hint); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice)); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLFloorKernel.cpp b/src/core/CL/kernels/CLFloorKernel.cpp index f6b0e829a0..20e3a3a66f 100644 --- a/src/core/CL/kernels/CLFloorKernel.cpp +++ b/src/core/CL/kernels/CLFloorKernel.cpp @@ -69,7 +69,7 @@ void CLFloorKernel::configure(const ICLTensor *input, ICLTensor *output) update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLFloorKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp index 6ea1160c69..ae54e77972 100644 --- a/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp +++ b/src/core/CL/kernels/CLGEMMInterleave4x4Kernel.cpp @@ -157,7 +157,7 @@ void CLGEMMInterleave4x4Kernel::configure(const ICLTensor *input, ICLTensor *out // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), mult_interleave4x4_height, reinterpret_input_as_3d); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "interleave4x4_"; @@ -210,7 +210,7 @@ void CLGEMMInterleave4x4Kernel::run(const Window &window, cl::CommandQueue &queu unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp index 8a4a1b5820..9adf95fa33 100644 --- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp @@ -188,7 +188,7 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC // Configure kernel window auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, num_elements_processed); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device()); @@ -273,7 +273,7 @@ void CLGEMMLowpMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue add_2D_tensor_argument(idx, _input0, slice); add_2D_tensor_argument(idx, _input1, slice_b); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp index 221a1566b9..aa954abde1 100644 --- a/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.cpp @@ -159,7 +159,7 @@ void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const I vector_sum_row != nullptr ? vector_sum_row->info() : nullptr, a_offset, b_offset); // NOLINT ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "gemmlowp_offset_contribution_"; diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp index ff2fc646aa..875e26d6cb 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -146,7 +146,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window &window, cl::CommandQueue &queue) @@ -174,4 +174,4 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::run(const Window enqueue(queue, *this, slice); } while(collapsed.slide_window_slice_3D(slice)); -} \ No newline at end of file +} diff --git a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index 151a6588d5..57891131c7 100644 --- a/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -145,7 +145,7 @@ void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ICLTensor *i // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } void CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp index 6951512167..cd26cd1597 100644 --- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -121,7 +121,7 @@ void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTens // Configure kernel window auto win_config = validate_and_configure_window_matrix_a_reduction(_input->info(), _output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMLowpMatrixAReductionKernel::validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row) @@ -175,7 +175,7 @@ void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTens // Configure kernel window auto win_config = validate_and_configure_window_matrix_b_reduction(_input->info(), _output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMLowpMatrixBReductionKernel::validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col) diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp index ebe4013bf0..2f1f1bf865 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp @@ -88,7 +88,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe // Configure kernel window auto win_config = validate_and_configure_window(accum->info(), biases->info(), gpu_target, vector_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Add build options CLBuildOptions build_opts; @@ -126,7 +126,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQu add_2D_tensor_argument(idx, _accum, accum_slice); add_1D_tensor_argument(idx, _biases, biases_slice); - enqueue(queue, *this, accum_slice, _lws_hint); + enqueue(queue, *this, accum_slice, lws_hint()); } while(window.slide_window_slice_2D(accum_slice)); } diff --git a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp index bcc3a01296..0c65bb40c0 100644 --- a/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixAdditionKernel.cpp @@ -98,7 +98,7 @@ void CLGEMMMatrixAdditionKernel::configure(const ICLTensor *input, ICLTensor *ou // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta) diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp index 79e2f8b11a..8530ed2fd3 100644 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp @@ -253,7 +253,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen // Configure kernel window auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), is_interleaved_transposed, reshape_info, gpu_target, num_elements_processed); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Create build options CLBuildOptions build_opts; @@ -316,7 +316,7 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen // The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels // via exhaustive autotuning over a range of representative layer configurations. - _lws_hint = cl::NDRange(4); + set_lws_hint(cl::NDRange(4)); } else // (MIDGARD and F32) or (F16) { @@ -416,7 +416,7 @@ void CLGEMMMatrixMultiplyKernel::run(const Window &window, cl::CommandQueue &que _kernel.setArg(idx++, static_cast(_input0->info()->strides_in_bytes()[2])); _kernel.setArg(idx++, static_cast(_input1->info()->strides_in_bytes()[2])); _kernel.setArg(idx++, static_cast(_output->info()->strides_in_bytes()[2])); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 43a6cf25db..11a4292270 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -121,7 +121,7 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLGEMMMatrixVectorMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) @@ -165,7 +165,7 @@ void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueu unsigned int idx_2 = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); add_3D_tensor_argument(idx_0, _input0, slice_in); add_1D_tensor_argument(idx_2, _output, slice_out); - enqueue(queue, *this, slice_in, _lws_hint); + enqueue(queue, *this, slice_in, lws_hint()); } while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); } diff --git a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp index 7e44fa7118..5b299052d4 100644 --- a/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp +++ b/src/core/CL/kernels/CLGEMMTranspose1xWKernel.cpp @@ -107,7 +107,7 @@ void CLGEMMTranspose1xWKernel::configure(const ICLTensor *input, ICLTensor *outp unsigned int num_elems_processed_per_iteration = 1; auto win_config = validate_and_configure_window(input->info(), output->info(), num_elems_processed_per_iteration, mult_transpose1xW_width); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Create build options CLBuildOptions build_opts; @@ -157,7 +157,7 @@ void CLGEMMTranspose1xWKernel::run(const Window &window, cl::CommandQueue &queue unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, _lws_hint); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_3D(out_slice)); } diff --git a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp index e5bc3f9656..7e8f3139f2 100644 --- a/src/core/CL/kernels/CLGaussian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLGaussian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -72,5 +72,5 @@ void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, b output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp index a4fda364e3..6b729c8585 100644 --- a/src/core/CL/kernels/CLGaussianPyramidKernel.cpp +++ b/src/core/CL/kernels/CLGaussianPyramidKernel.cpp @@ -95,7 +95,7 @@ void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *ou output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLGaussianPyramidHorKernel::run(const Window &window, cl::CommandQueue &queue) @@ -177,7 +177,7 @@ void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *o output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLGaussianPyramidVertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp index a15aab1f37..26c3b81175 100644 --- a/src/core/CL/kernels/CLHOGDescriptorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDescriptorKernel.cpp @@ -91,7 +91,7 @@ void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHOGOrientationBinningKernel::run(const Window &window, cl::CommandQueue &queue) @@ -174,7 +174,7 @@ void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHOGBlockNormalizationKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHOGDetectorKernel.cpp b/src/core/CL/kernels/CLHOGDetectorKernel.cpp index caca49846f..12bbbaf9f2 100644 --- a/src/core/CL/kernels/CLHOGDetectorKernel.cpp +++ b/src/core/CL/kernels/CLHOGDetectorKernel.cpp @@ -110,7 +110,7 @@ void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, I update_window_and_padding(win, AccessWindowRectangle(input->info(), 0, 0, num_elems_read_per_iteration, num_rows_read_per_iteration)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHOGDetectorKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 1f757fe34c..5320b6bebc 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -106,7 +106,7 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), input2->info()->valid_region()); output_access.set_valid_region(win, valid_region, border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHarrisScoreKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLHistogramKernel.cpp b/src/core/CL/kernels/CLHistogramKernel.cpp index 7b715abb36..b56ad8d38d 100644 --- a/src/core/CL/kernels/CLHistogramKernel.cpp +++ b/src/core/CL/kernels/CLHistogramKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -107,7 +107,7 @@ void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *outp update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, pixels_per_item)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHistogramKernel::run(const Window &window, cl::CommandQueue &queue) @@ -198,7 +198,7 @@ void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D win.set(0, Window::Dimension(start_position, _input->info()->dimension(0))); win.set(1, Window::Dimension(0, _input->info()->dimension(1))); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, 1)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLHistogramBorderKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 39654e2190..42bb96c16f 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -308,7 +308,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const auto win_config = validate_and_configure_window(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, im2col_config.num_elems_processed_per_iteration, im2col_config.is_padding_required_nchw); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = im2col_config.kernel_name; @@ -386,7 +386,7 @@ void CLIm2ColKernel::run(const Window &window, cl::CommandQueue &queue) add_2D_tensor_argument(idx, _output, slice_out); _kernel.setArg(idx++, static_cast(_input->info()->strides_in_bytes()[3])); _kernel.setArg(idx++, static_cast(_output->info()->strides_in_bytes()[2])); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice) && window_output.slide_window_slice_2D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in)); -} \ No newline at end of file +} diff --git a/src/core/CL/kernels/CLIntegralImageKernel.cpp b/src/core/CL/kernels/CLIntegralImageKernel.cpp index 69ede457df..6fb39ff0a2 100644 --- a/src/core/CL/kernels/CLIntegralImageKernel.cpp +++ b/src/core/CL/kernels/CLIntegralImageKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *outp output_access.set_valid_region(win, input->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } CLIntegralImageVertKernel::CLIntegralImageVertKernel() @@ -89,7 +89,7 @@ void CLIntegralImageVertKernel::configure(ICLTensor *in_out) in_out_access.set_valid_region(win, in_out->info()->valid_region()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLIntegralImageVertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 39d9f958d3..54ed51eda2 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -120,7 +120,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor auto win_config = validate_and_configure_window(_input->info(), _output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) diff --git a/src/core/CL/kernels/CLLKTrackerKernel.cpp b/src/core/CL/kernels/CLLKTrackerKernel.cpp index 078d18e61c..40ed630c89 100644 --- a/src/core/CL/kernels/CLLKTrackerKernel.cpp +++ b/src/core/CL/kernels/CLLKTrackerKernel.cpp @@ -75,7 +75,7 @@ void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const Window window; window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1)); window.set(Window::DimY, Window::Dimension(0, 1, 1)); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); } void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue) @@ -104,7 +104,7 @@ void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points Window window; window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1)); window.set(Window::DimY, Window::Dimension(0, 1, 1)); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); } void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue) @@ -156,7 +156,7 @@ void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTen AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1), valid_region.end(0), valid_region.end(1))); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); // Initialize required variables const int level0 = (level == 0) ? 1 : 0; @@ -232,7 +232,7 @@ void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInterna AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1), valid_region.end(0), valid_region.end(1))); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); // Initialize required variables const int level0 = (level == 0) ? 1 : 0; diff --git a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp index 1a7d95cc2c..ad2f3a4892 100644 --- a/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.cpp @@ -90,13 +90,14 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, _input1 = input1; _output = output; + cl::NDRange lws_hint; if(output->info()->dimension(1) == 196) { - _lws_hint = cl::NDRange(1, 7); + lws_hint = cl::NDRange(1, 7); } else { - _lws_hint = cl::NDRange(8, 8); + lws_hint = cl::NDRange(8, 8); } std::ostringstream mm_arguments; @@ -114,7 +115,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); } Status CLLocallyConnectedMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) @@ -142,7 +143,7 @@ void CLLocallyConnectedMatrixMultiplyKernel::run(const Window &window, cl::Comma add_2D_tensor_argument(idx, _input0, slice); add_3D_tensor_argument(idx, _input1, slice_matrix_b); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); } diff --git a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp index c504189169..0b34c59a03 100644 --- a/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp +++ b/src/core/CL/kernels/CLMagnitudePhaseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -137,7 +137,7 @@ void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, output_magnitude_access.set_valid_region(win, valid_region); output_phase_access.set_valid_region(win, valid_region); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMagnitudePhaseKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index bd31131fe5..0cde9c5fe6 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -106,7 +106,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); update_window_and_padding(win, input_access); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMeanStdDevKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLMedian3x3Kernel.cpp b/src/core/CL/kernels/CLMedian3x3Kernel.cpp index 3b9fb1fe88..b93179d5f4 100644 --- a/src/core/CL/kernels/CLMedian3x3Kernel.cpp +++ b/src/core/CL/kernels/CLMedian3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,5 +62,5 @@ void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, boo output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp index 9493ddc878..fa7b678e86 100644 --- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp @@ -105,7 +105,7 @@ void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output) ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output) diff --git a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp index 5636592347..0c7f3bc070 100644 --- a/src/core/CL/kernels/CLMinMaxLocationKernel.cpp +++ b/src/core/CL/kernels/CLMinMaxLocationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -118,7 +118,7 @@ void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max) // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, ceil_to_multiple(num_elems_processed_per_iteration, 16))); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMinMaxKernel::run(const Window &window, cl::CommandQueue &queue) @@ -209,7 +209,7 @@ void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_ma constexpr unsigned int num_elems_processed_per_iteration = 1; Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLMinMaxLocationKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp index 6afa5822ba..5e419743d0 100644 --- a/src/core/CL/kernels/CLNonLinearFilterKernel.cpp +++ b/src/core/CL/kernels/CLNonLinearFilterKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -94,5 +94,5 @@ void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *outpu output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp index 6a96b0effd..4e41f0df42 100644 --- a/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp +++ b/src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,5 +68,5 @@ void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTenso output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index edc9e9d58c..8a7b7aed22 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -135,7 +135,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = "normalization_layer_"; diff --git a/src/core/CL/kernels/CLPermuteKernel.cpp b/src/core/CL/kernels/CLPermuteKernel.cpp index 7c0c95be1c..c6f0f4bc55 100644 --- a/src/core/CL/kernels/CLPermuteKernel.cpp +++ b/src/core/CL/kernels/CLPermuteKernel.cpp @@ -120,7 +120,7 @@ void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const coord.set_num_dimensions(output->info()->num_dimensions()); output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLPermuteKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm) diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index 4ea093fe04..4ca2ef8aa3 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -197,7 +197,7 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I _kernel.setArg(idx++, scale); } - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLPixelWiseMultiplicationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index d5ea092c78..df13068239 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -269,7 +269,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); if(data_layout == DataLayout::NCHW) { @@ -336,7 +336,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); break; @@ -355,7 +355,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(in_slice)); break; diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index af751f4832..9028b0f604 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -96,7 +96,7 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *min_max) diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp index 4048e927f5..23676942a6 100644 --- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp @@ -101,7 +101,7 @@ void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLROIArra update_window_and_padding(window, input_access, output_access); output_access.set_valid_region(window, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(window); + ICLKernel::configure_internal(window); } void CLROIPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 09861630ac..bf36ae2c0f 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -114,8 +114,8 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou // Set the number of WG based on the input size. If input width is < 128 // we can use fewer threads than 8. - _lws_hint = cl::NDRange(std::min(8U, num_of_threads)); - _border_size = BorderSize(0, border_width, 0, 0); + cl::NDRange lws_hint = cl::NDRange(std::min(8U, num_of_threads)); + _border_size = BorderSize(0, border_width, 0, 0); // Set build options std::set build_opts; @@ -142,7 +142,7 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config), lws_hint); } Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) @@ -171,7 +171,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step())); // Set local sums buffer - unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size(); + unsigned int local_sum_size = lws_hint()[0] * _input->info()->element_size(); _kernel.setArg(num_arguments_per_2D_tensor() * 2, local_sum_size, nullptr); do @@ -179,7 +179,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que unsigned int idx = 0; add_2D_tensor_argument(idx, _input, in_slice); add_2D_tensor_argument(idx, _output, out_slice); - enqueue(queue, *this, in_slice, _lws_hint); + enqueue(queue, *this, in_slice, lws_hint()); } while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); } diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index b46bb30c59..33c5f2d402 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -83,7 +83,7 @@ void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, co output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); // Set static arguments unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters diff --git a/src/core/CL/kernels/CLReshapeLayerKernel.cpp b/src/core/CL/kernels/CLReshapeLayerKernel.cpp index ce9d7fff67..c7efa9a82d 100644 --- a/src/core/CL/kernels/CLReshapeLayerKernel.cpp +++ b/src/core/CL/kernels/CLReshapeLayerKernel.cpp @@ -92,7 +92,7 @@ void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output) output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLReshapeLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index b1655d5cc1..b2cd4b7adf 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -181,7 +181,7 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info(), policy, border_mode, sampling_policy, border); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Create kernel CLBuildOptions build_opts; @@ -223,7 +223,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_2D_tensor_argument(idx, _input, slice); add_2D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_2D(slice)); break; @@ -237,7 +237,7 @@ void CLScaleKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); break; diff --git a/src/core/CL/kernels/CLScharr3x3Kernel.cpp b/src/core/CL/kernels/CLScharr3x3Kernel.cpp index 913ef592d4..5182390822 100644 --- a/src/core/CL/kernels/CLScharr3x3Kernel.cpp +++ b/src/core/CL/kernels/CLScharr3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,7 @@ void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, I output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLScharr3x3Kernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSobel3x3Kernel.cpp b/src/core/CL/kernels/CLSobel3x3Kernel.cpp index 436aaa498a..b4bfe28216 100644 --- a/src/core/CL/kernels/CLSobel3x3Kernel.cpp +++ b/src/core/CL/kernels/CLSobel3x3Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,7 @@ void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, IC output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel3x3Kernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSobel5x5Kernel.cpp b/src/core/CL/kernels/CLSobel5x5Kernel.cpp index 4c0316f19e..46aa074d61 100644 --- a/src/core/CL/kernels/CLSobel5x5Kernel.cpp +++ b/src/core/CL/kernels/CLSobel5x5Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,7 +102,7 @@ void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel5x5HorKernel::run(const Window &window, cl::CommandQueue &queue) @@ -201,7 +201,7 @@ void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor * output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel5x5VertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSobel7x7Kernel.cpp b/src/core/CL/kernels/CLSobel7x7Kernel.cpp index a477953cfb..0c94e88acf 100644 --- a/src/core/CL/kernels/CLSobel7x7Kernel.cpp +++ b/src/core/CL/kernels/CLSobel7x7Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -105,7 +105,7 @@ void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel7x7HorKernel::run(const Window &window, cl::CommandQueue &queue) @@ -204,7 +204,7 @@ void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor * output_x_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); output_y_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } void CLSobel7x7VertKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index b9ebdc9583..403256baae 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -242,7 +242,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); - _lws_hint = cl::NullRange; + cl::NDRange lws_hint(cl::NullRange); std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") : std::string("softmax_layer_max_shift_exp_sum_serial"); ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size); @@ -264,7 +264,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE"); // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run(). // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0]. - _lws_hint = cl::NDRange(_grid_size); + lws_hint = cl::NDRange(_grid_size); } // Create kernel. @@ -277,7 +277,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor // Configure window auto win_config = validate_and_configure_window_1DMaxShiftExpSum(input->info(), max->info(), output->info(), sum->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, lws_hint); } Status CLLogits1DMaxShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum) @@ -322,7 +322,7 @@ void CLLogits1DMaxShiftExpSumKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _max, slice); add_3D_tensor_argument(idx, _output, slice); add_3D_tensor_argument(idx, _sum, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } @@ -365,7 +365,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su // Configure window auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLLogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output) @@ -394,7 +394,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _sum, sum_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLTransposeKernel.cpp b/src/core/CL/kernels/CLTransposeKernel.cpp index 3d584345d7..695bdf7f40 100644 --- a/src/core/CL/kernels/CLTransposeKernel.cpp +++ b/src/core/CL/kernels/CLTransposeKernel.cpp @@ -117,9 +117,8 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - _input = input; - _output = output; - _lws_hint = cl::NDRange(2, 8); + _input = input; + _output = output; std::set build_opts; std::ostringstream data_type_in_bytes; @@ -131,5 +130,5 @@ void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output) // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, cl::NDRange(2, 8)); } diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp index e0e09400af..1fae2b1974 100644 --- a/src/core/CL/kernels/CLWarpAffineKernel.cpp +++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp @@ -98,5 +98,5 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp index d6fcb09658..e537aec058 100644 --- a/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp +++ b/src/core/CL/kernels/CLWarpPerspectiveKernel.cpp @@ -95,5 +95,5 @@ void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *outpu output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 58ecd9ccb3..5ef0f5b152 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -115,7 +115,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * Window win = calculate_max_window(*input->info(), Steps()); // The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); - ICLKernel::configure(win); + ICLKernel::configure_internal(win); } Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups) diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index e2ca05a72a..e5ab8d2304 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -111,7 +111,7 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i auto win_config = validate_and_configure_window(input->info(), width_offset, output->info()); ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - ICLKernel::configure(std::get<1>(win_config)); + ICLKernel::configure_internal(std::get<1>(win_config)); } void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp index e6c713e5e7..818638c89e 100644 --- a/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp @@ -125,7 +125,7 @@ void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTenso // Configure kernel window auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLWinogradFilterTransformKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info) diff --git a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp index fcfd9e30a1..2309fbfb26 100644 --- a/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradInputTransformKernel.cpp @@ -180,8 +180,6 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor _step_z = (_input->info()->dimension(2) % 2) != 0 ? 1 : 2; } - _lws_hint = cl::NDRange(1, 1, 8); - // Append stepz and data layout kernel_name += "_stepz"; kernel_name += support::cpp11::to_string(_step_z); @@ -192,7 +190,7 @@ void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor // Create window and update padding auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, cl::NDRange(1, 1, 8)); _config_id = kernel_name; _config_id += support::cpp11::to_string(input->info()->dimension(0)); @@ -239,7 +237,7 @@ void CLWinogradInputTransformKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp index 40d5f6588f..fa42596604 100644 --- a/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp +++ b/src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp @@ -172,7 +172,7 @@ void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const IC // Configure kernel window auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); // Set config_id for enabling LWS tuning _config_id = kernel_name; @@ -231,7 +231,7 @@ void CLWinogradOutputTransformKernel::run(const Window &window, cl::CommandQueue unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window.slide_window_slice_3D(slice) && window.slide_window_slice_3D(slice_out)); } diff --git a/src/graph/GraphContext.cpp b/src/graph/GraphContext.cpp index 3f311145bc..5f33ed3537 100644 --- a/src/graph/GraphContext.cpp +++ b/src/graph/GraphContext.cpp @@ -22,7 +22,9 @@ * SOFTWARE. */ #include "arm_compute/graph/GraphContext.h" -#include + +#include "arm_compute/graph.h" +#include "arm_compute/graph/Utils.h" namespace arm_compute { @@ -33,6 +35,12 @@ GraphContext::GraphContext() { } +GraphContext::~GraphContext() +{ + _memory_managers.clear(); + release_default_graph_context(*this); +} + const GraphConfig &GraphContext::config() const { return _config; @@ -82,4 +90,4 @@ void GraphContext::finalize() } } } // namespace graph -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/graph/Utils.cpp b/src/graph/Utils.cpp index d89126272d..4715694f15 100644 --- a/src/graph/Utils.cpp +++ b/src/graph/Utils.cpp @@ -89,6 +89,14 @@ PassManager create_default_pass_manager(Target target) return pm; } +void release_default_graph_context(GraphContext &ctx) +{ + for(const auto &backend : backends::BackendRegistry::get().backends()) + { + backend.second->release_backend_context(ctx); + } +} + void setup_default_graph_context(GraphContext &ctx) { for(const auto &backend : backends::BackendRegistry::get().backends()) @@ -132,4 +140,4 @@ size_t get_dimension_idx(const TensorDescriptor &descriptor, const DataLayoutDim } } } // namespace graph -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index 6717f9f8e3..f35daf4ae5 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -63,7 +63,7 @@ bool file_exists(const std::string &filename) static detail::BackendRegistrar CLDeviceBackend_registrar(Target::CL); CLDeviceBackend::CLDeviceBackend() - : _initialized(false), _tuner(), _allocator(nullptr), _tuner_file() + : _context_count(0), _tuner(), _allocator(nullptr), _tuner_file() { } @@ -90,13 +90,23 @@ void CLDeviceBackend::initialize_backend() _allocator = support::cpp14::make_unique(); } +void CLDeviceBackend::release_backend_context(GraphContext &ctx) +{ + ARM_COMPUTE_UNUSED(ctx); + _context_count--; + if(_context_count == 0) // No more context using the backend: free resources + { + _allocator = nullptr; + } +} + void CLDeviceBackend::setup_backend_context(GraphContext &ctx) { // Force backend initialization - if(!_initialized) + _context_count++; + if(_context_count == 1) { initialize_backend(); - _initialized = true; } // Setup tuner diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp index bfac31ac2e..ec3cf4f21e 100644 --- a/src/graph/backends/GLES/GCDeviceBackend.cpp +++ b/src/graph/backends/GLES/GCDeviceBackend.cpp @@ -63,6 +63,12 @@ void GCDeviceBackend::initialize_backend() GCScheduler::get().default_init(); } +void GCDeviceBackend::release_backend_context(GraphContext &ctx) +{ + //Nothing to do + ARM_COMPUTE_UNUSED(ctx); +} + void GCDeviceBackend::setup_backend_context(GraphContext &ctx) { // Force backend initialization @@ -151,4 +157,4 @@ std::shared_ptr GCDeviceBackend::create_memory_mana } } // namespace backends } // namespace graph -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/graph/backends/NEON/NEDeviceBackend.cpp b/src/graph/backends/NEON/NEDeviceBackend.cpp index 7c2db40260..5fc44d0c68 100644 --- a/src/graph/backends/NEON/NEDeviceBackend.cpp +++ b/src/graph/backends/NEON/NEDeviceBackend.cpp @@ -61,6 +61,13 @@ NEDeviceBackend::NEDeviceBackend() void NEDeviceBackend::initialize_backend() { + //Nothing to do +} + +void NEDeviceBackend::release_backend_context(GraphContext &ctx) +{ + //Nothing to do + ARM_COMPUTE_UNUSED(ctx); } void NEDeviceBackend::setup_backend_context(GraphContext &ctx) @@ -155,4 +162,4 @@ std::shared_ptr NEDeviceBackend::create_memory_mana } } // namespace backends } // namespace graph -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/graph/frontend/Stream.cpp b/src/graph/frontend/Stream.cpp index 96a166c79c..878d688995 100644 --- a/src/graph/frontend/Stream.cpp +++ b/src/graph/frontend/Stream.cpp @@ -33,7 +33,7 @@ namespace graph namespace frontend { Stream::Stream(size_t id, std::string name) - : _manager(), _ctx(), _g(id, std::move(name)) + : _ctx(), _manager(), _g(id, std::move(name)) { } @@ -66,4 +66,4 @@ Graph &Stream::graph() } } // namespace frontend } // namespace graph -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index f524a918e6..a812bad865 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -29,15 +29,15 @@ using namespace arm_compute; -#if defined(ARM_COMPUTE_DEBUG_ENABLED) namespace { +#if defined(ARM_COMPUTE_DEBUG_ENABLED) void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data) { printf("%.*s", len, buffer); } -} // namespace #endif /* defined(ARM_COMPUTE_DEBUG_ENABLED) */ +} // namespace std::once_flag CLScheduler::_initialize_symbols; @@ -57,19 +57,25 @@ void CLScheduler::default_init(ICLTuner *cl_tuner) { if(!_is_initialised) { - cl::Context ctx = cl::Context::getDefault(); - auto queue_properties = cl::CommandQueue::getDefault().getInfo(nullptr); + std::vector platforms; + cl::Platform::get(&platforms); + ARM_COMPUTE_ERROR_ON_MSG(platforms.size() == 0, "Couldn't find any OpenCL platform"); + cl::Platform p = platforms[0]; + cl::Context ctx; + cl::Device device; + std::vector platform_devices; + p.getDevices(CL_DEVICE_TYPE_DEFAULT, &platform_devices); + ARM_COMPUTE_ERROR_ON_MSG(platform_devices.size() == 0, "Couldn't find any OpenCL device"); + device = platform_devices[0]; #if defined(ARM_COMPUTE_DEBUG_ENABLED) - // Query devices in the context for cl_arm_printf support - std::vector def_platform_devices; - cl::Platform::getDefault().getDevices(CL_DEVICE_TYPE_DEFAULT, &def_platform_devices); - if(device_supports_extension(def_platform_devices[0], "cl_arm_printf")) + // Query devices in the context for cl_arm_printf support + if(device_supports_extension(device, "cl_arm_printf")) { // Create a cl_context with a printf_callback and user specified buffer size. cl_context_properties properties[] = { - CL_CONTEXT_PLATFORM, reinterpret_cast(cl::Platform::get()()), + CL_CONTEXT_PLATFORM, reinterpret_cast(p()), // Enable a printf callback function for this context. CL_PRINTF_CALLBACK_ARM, reinterpret_cast(printf_callback), // Request a minimum printf buffer size of 4MB for devices in the @@ -77,13 +83,22 @@ void CLScheduler::default_init(ICLTuner *cl_tuner) CL_PRINTF_BUFFERSIZE_ARM, 0x1000, 0 }; - ctx = cl::Context(CL_DEVICE_TYPE_DEFAULT, properties); + ctx = cl::Context(device, properties); } + else #endif // defined(ARM_COMPUTE_DEBUG_ENABLED) + { + cl_context_properties properties[] = + { + CL_CONTEXT_PLATFORM, reinterpret_cast(p()), + 0 + }; + ctx = cl::Context(device, properties); + }; - cl::CommandQueue queue = cl::CommandQueue(ctx, cl::Device::getDefault(), queue_properties); - CLKernelLibrary::get().init("./cl_kernels/", ctx, cl::Device::getDefault()); - init(ctx, queue, cl::Device::getDefault(), cl_tuner); + cl::CommandQueue queue = cl::CommandQueue(ctx, device); + CLKernelLibrary::get().init("./cl_kernels/", ctx, device); + init(ctx, queue, device, cl_tuner); // Create a default static tuner and set if none was provided _cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target); diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp index 7e1f2934ff..182e0babcd 100644 --- a/tests/framework/Framework.cpp +++ b/tests/framework/Framework.cpp @@ -538,7 +538,7 @@ bool Framework::run() auto queue_properties = CLScheduler::get().queue().getInfo(nullptr); cl::Context new_ctx = cl::Context(CL_DEVICE_TYPE_DEFAULT, ctx_properties.data()); - cl::CommandQueue new_queue = cl::CommandQueue(new_ctx, cl::Device::getDefault(), queue_properties); + cl::CommandQueue new_queue = cl::CommandQueue(new_ctx, CLKernelLibrary::get().get_device(), queue_properties); CLKernelLibrary::get().clear_programs_cache(); CLScheduler::get().set_context(new_ctx); diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp index 26d21b54f2..9f760563b5 100644 --- a/tests/validation/CL/UNIT/Tuner.cpp +++ b/tests/validation/CL/UNIT/Tuner.cpp @@ -57,12 +57,11 @@ TEST_CASE(BifrostTunerSimple, framework::DatasetMode::ALL) CLDirectConvolutionLayerKernel conv; conv.set_target(GPUTarget::G72); - // Hard-wire lws to kernel and validate lws - conv.set_lws_hint(fake_lws); - ARM_COMPUTE_EXPECT(conv.lws_hint()[0] == 2000, framework::LogLevel::ERRORS); - // Configure conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1)); + + // Hard-wire lws to kernel and validate lws + conv.set_lws_hint(fake_lws); ARM_COMPUTE_EXPECT(conv.lws_hint()[0] == 2000, framework::LogLevel::ERRORS); // Tune kernel and validate -- cgit v1.2.1