From df473eab0ab8a52e6b58e0f6442b39ba4c1d68ea Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 31 May 2018 18:53:52 +0100 Subject: COMPMID-1182: printf doesn't work Change-Id: I013d57f6e2becbd6d2d7700ce5fbbeca670443c4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133735 Tested-by: Jenkins Reviewed-by: Pablo Tello --- arm_compute/graph/backends/CL/CLDeviceBackend.h | 5 ++- arm_compute/graph/backends/GLES/GCDeviceBackend.h | 3 +- arm_compute/graph/detail/ExecutionHelpers.h | 2 - arm_compute/runtime/CL/CLScheduler.h | 48 +---------------------- examples/graph_squeezenet_v1_1.cpp | 4 -- src/core/CL/CLKernelLibrary.cpp | 5 +++ src/graph/GraphManager.cpp | 1 - src/graph/backends/CL/CLDeviceBackend.cpp | 15 +++++-- src/graph/backends/GLES/GCDeviceBackend.cpp | 9 ++++- src/graph/detail/ExecutionHelpers.cpp | 8 ---- src/runtime/CL/CLMemory.cpp | 2 +- src/runtime/CL/CLScheduler.cpp | 48 +++++++++++++++++++++++ tests/framework/Framework.cpp | 13 +++--- tests/validation/CL/FixedPoint/FixedPointTarget.h | 4 +- 14 files changed, 89 insertions(+), 78 deletions(-) diff --git a/arm_compute/graph/backends/CL/CLDeviceBackend.h b/arm_compute/graph/backends/CL/CLDeviceBackend.h index ab39d0fb1b..0b45dfe479 100644 --- a/arm_compute/graph/backends/CL/CLDeviceBackend.h +++ b/arm_compute/graph/backends/CL/CLDeviceBackend.h @@ -63,8 +63,9 @@ public: std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; private: - CLTuner _tuner; /**< CL kernel tuner */ - CLBufferAllocator _allocator; /**< CL buffer affinity allocator */ + bool _initialized; /**< Flag that specifies if the backend has been default initialized */ + CLTuner _tuner; /**< CL kernel tuner */ + std::unique_ptr _allocator; /**< CL buffer affinity allocator */ }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/backends/GLES/GCDeviceBackend.h b/arm_compute/graph/backends/GLES/GCDeviceBackend.h index dc0e2b07dc..ba789221e3 100644 --- a/arm_compute/graph/backends/GLES/GCDeviceBackend.h +++ b/arm_compute/graph/backends/GLES/GCDeviceBackend.h @@ -53,7 +53,8 @@ public: std::shared_ptr create_memory_manager(MemoryManagerAffinity affinity) override; private: - GCBufferAllocator _allocator; /**< GLES buffer affinity allocator */ + bool _initialized; /**< Flag that specifies if the backend has been default initialized */ + GCBufferAllocator _allocator; /**< GLES buffer affinity allocator */ }; } // namespace backends } // namespace graph diff --git a/arm_compute/graph/detail/ExecutionHelpers.h b/arm_compute/graph/detail/ExecutionHelpers.h index 27cae4b8ab..23dd207695 100644 --- a/arm_compute/graph/detail/ExecutionHelpers.h +++ b/arm_compute/graph/detail/ExecutionHelpers.h @@ -39,8 +39,6 @@ class INode; namespace detail { -/** Initializes the available backends **/ -void default_initialize_backends(); /** Validates all nodes * * @param[in] g Graph to validate diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h index 26b459c42d..bdd779bd91 100644 --- a/arm_compute/runtime/CL/CLScheduler.h +++ b/arm_compute/runtime/CL/CLScheduler.h @@ -32,16 +32,6 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/ICLTuner.h" -#if defined(ARM_COMPUTE_DEBUG_ENABLED) -namespace -{ -void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data) -{ - printf("%.*s", len, buffer); -} -} -#endif /* defined(ARM_COMPUTE_DEBUG_ENABLED) */ - namespace arm_compute { class ICLKernel; @@ -68,43 +58,7 @@ public: * * @param[in] cl_tuner (Optional) Pointer to ICLTuner (default=nullptr) */ - void default_init(ICLTuner *cl_tuner = nullptr) - { - if(!_is_initialised) - { -#if defined(ARM_COMPUTE_DEBUG_ENABLED) - bool is_cl_arm_printf_supported = false; - - //query devices in the context for cl_arm_printf support - std::vector def_platform_devices; - cl::Platform::getDefault().getDevices(CL_DEVICE_TYPE_DEFAULT, &def_platform_devices); - is_cl_arm_printf_supported = device_supports_extension(def_platform_devices[0], "cl_arm_printf"); - - if(is_cl_arm_printf_supported) - { - // Create a cl_context with a printf_callback and user specified buffer size. - cl_context_properties properties[] = - { - // Enable a printf callback function for this context. - CL_PRINTF_CALLBACK_ARM, reinterpret_cast(printf_callback), - // Request a minimum printf buffer size of 4MB for devices in the - // context that support this extension. - CL_PRINTF_BUFFERSIZE_ARM, static_cast(0x100000), - CL_CONTEXT_PLATFORM, reinterpret_cast(cl::Platform::get()()), - 0 - }; - cl::Context::setDefault(cl::Context(CL_DEVICE_TYPE_DEFAULT, properties)); - } -#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) - - CLKernelLibrary::get().init("./cl_kernels/", cl::Context::getDefault(), cl::Device::getDefault()); - init(cl::Context::getDefault(), cl::CommandQueue::getDefault(), cl::Device::getDefault(), cl_tuner); - } - else - { - _cl_tuner = cl_tuner; - } - } + void default_init(ICLTuner *cl_tuner = nullptr); /** Schedule the execution of the passed kernel if possible. * * @param[in] kernel Kernel to execute. diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index 9e3466b993..b2c5a442cd 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -33,10 +33,6 @@ using namespace arm_compute::utils; using namespace arm_compute::graph::frontend; using namespace arm_compute::graph_utils; -namespace -{ -} // namespace - /** Example demonstrating how to implement Squeezenet's v1.1 network using the Compute Library's graph API * * @param[in] argc Number of arguments diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 21a0e68958..7f1667a9cd 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -741,6 +741,11 @@ Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const Stri } std::string concat_str; +#if defined(ARM_COMPUTE_DEBUG_ENABLED) + // Enable debug properties in CL kernels + concat_str += " -DARM_COMPUTE_DEBUG_ENABLED"; +#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) + if(fp16_supported()) { concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 "; diff --git a/src/graph/GraphManager.cpp b/src/graph/GraphManager.cpp index a67e5b25d6..0ea3254faa 100644 --- a/src/graph/GraphManager.cpp +++ b/src/graph/GraphManager.cpp @@ -38,7 +38,6 @@ namespace graph GraphManager::GraphManager() : _workloads() { - detail::default_initialize_backends(); } void GraphManager::finalize_graph(Graph &graph, GraphContext &ctx, PassManager &pm, Target target) diff --git a/src/graph/backends/CL/CLDeviceBackend.cpp b/src/graph/backends/CL/CLDeviceBackend.cpp index 7f2be674f6..b235c3aa48 100644 --- a/src/graph/backends/CL/CLDeviceBackend.cpp +++ b/src/graph/backends/CL/CLDeviceBackend.cpp @@ -66,7 +66,7 @@ static detail::BackendRegistrar CLDeviceBackend_registrar(Targe static const std::string tuner_data_filename = "acl_tuner.csv"; CLDeviceBackend::CLDeviceBackend() - : _tuner(), _allocator(cl::Context::getDefault()) + : _initialized(false), _tuner(), _allocator(nullptr) { } @@ -96,11 +96,18 @@ void CLDeviceBackend::initialize_backend() CLScheduler::get().default_init(&_tuner); // Create allocator with new context - _allocator = CLBufferAllocator(); + _allocator = support::cpp14::make_unique(); } void CLDeviceBackend::setup_backend_context(GraphContext &ctx) { + // Force backend initialization + if(!_initialized) + { + initialize_backend(); + _initialized = true; + } + // Setup tuner set_kernel_tuning(ctx.config().use_tuner); @@ -124,7 +131,7 @@ bool CLDeviceBackend::is_backend_supported() IAllocator *CLDeviceBackend::backend_allocator() { - return &_allocator; + return _allocator.get(); } std::unique_ptr CLDeviceBackend::create_tensor(const Tensor &tensor) @@ -180,7 +187,7 @@ std::shared_ptr CLDeviceBackend::create_memory_mana auto pool_mgr = std::make_shared(); auto mm = std::make_shared(lifetime_mgr, pool_mgr); - mm->set_allocator(&_allocator); + mm->set_allocator(_allocator.get()); return mm; } diff --git a/src/graph/backends/GLES/GCDeviceBackend.cpp b/src/graph/backends/GLES/GCDeviceBackend.cpp index 770cca5d42..bfac31ac2e 100644 --- a/src/graph/backends/GLES/GCDeviceBackend.cpp +++ b/src/graph/backends/GLES/GCDeviceBackend.cpp @@ -53,7 +53,7 @@ namespace backends static detail::BackendRegistrar GCDeviceBackend_registrar(Target::GC); GCDeviceBackend::GCDeviceBackend() - : _allocator() + : _initialized(false), _allocator() { } @@ -65,6 +65,13 @@ void GCDeviceBackend::initialize_backend() void GCDeviceBackend::setup_backend_context(GraphContext &ctx) { + // Force backend initialization + if(!_initialized) + { + initialize_backend(); + _initialized = true; + } + // Setup a management backend if(ctx.memory_management_ctx(Target::GC) == nullptr) { diff --git a/src/graph/detail/ExecutionHelpers.cpp b/src/graph/detail/ExecutionHelpers.cpp index c370fdf916..d68092a2e7 100644 --- a/src/graph/detail/ExecutionHelpers.cpp +++ b/src/graph/detail/ExecutionHelpers.cpp @@ -35,14 +35,6 @@ namespace graph { namespace detail { -void default_initialize_backends() -{ - for(const auto &backend : backends::BackendRegistry::get().backends()) - { - backend.second->initialize_backend(); - } -} - void validate_all_nodes(Graph &g) { auto &nodes = g.nodes(); diff --git a/src/runtime/CL/CLMemory.cpp b/src/runtime/CL/CLMemory.cpp index 534c4f9e34..bbc513d783 100644 --- a/src/runtime/CL/CLMemory.cpp +++ b/src/runtime/CL/CLMemory.cpp @@ -61,7 +61,7 @@ ICLMemoryRegion *CLMemory::region() const void CLMemory::create_empty_region() { - _region_owned = std::make_shared(cl::Context::getDefault(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 0); + _region_owned = std::make_shared(cl::Context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, 0); _region = _region_owned.get(); } } // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp index fdae615108..c348dfab80 100644 --- a/src/runtime/CL/CLScheduler.cpp +++ b/src/runtime/CL/CLScheduler.cpp @@ -28,6 +28,16 @@ using namespace arm_compute; +#if defined(ARM_COMPUTE_DEBUG_ENABLED) +namespace +{ +void printf_callback(const char *buffer, unsigned int len, size_t complete, void *user_data) +{ + printf("%.*s", len, buffer); +} +} // namespace +#endif /* defined(ARM_COMPUTE_DEBUG_ENABLED) */ + std::once_flag CLScheduler::_initialize_symbols; CLScheduler::CLScheduler() @@ -42,6 +52,44 @@ CLScheduler &CLScheduler::get() return scheduler; } +void CLScheduler::default_init(ICLTuner *cl_tuner) +{ + if(!_is_initialised) + { + cl::Context ctx = cl::Context::getDefault(); + auto queue_properties = cl::CommandQueue::getDefault().getInfo(nullptr); +#if defined(ARM_COMPUTE_DEBUG_ENABLED) + // Query devices in the context for cl_arm_printf support + std::vector def_platform_devices; + cl::Platform::getDefault().getDevices(CL_DEVICE_TYPE_DEFAULT, &def_platform_devices); + + if(device_supports_extension(def_platform_devices[0], "cl_arm_printf")) + { + // Create a cl_context with a printf_callback and user specified buffer size. + cl_context_properties properties[] = + { + CL_CONTEXT_PLATFORM, reinterpret_cast(cl::Platform::get()()), + // Enable a printf callback function for this context. + CL_PRINTF_CALLBACK_ARM, reinterpret_cast(printf_callback), + // Request a minimum printf buffer size of 4MB for devices in the + // context that support this extension. + CL_PRINTF_BUFFERSIZE_ARM, 0x1000, + 0 + }; + ctx = cl::Context(CL_DEVICE_TYPE_DEFAULT, properties); + } +#endif // defined(ARM_COMPUTE_DEBUG_ENABLED) + + cl::CommandQueue queue = cl::CommandQueue(ctx, cl::Device::getDefault(), queue_properties); + CLKernelLibrary::get().init("./cl_kernels/", ctx, cl::Device::getDefault()); + init(ctx, queue, cl::Device::getDefault(), cl_tuner); + } + else + { + _cl_tuner = cl_tuner; + } +} + void CLScheduler::enqueue(ICLKernel &kernel, bool flush) { ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised, diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp index fd0afe9d7f..7e1f2934ff 100644 --- a/tests/framework/Framework.cpp +++ b/tests/framework/Framework.cpp @@ -534,12 +534,15 @@ bool Framework::run() // Every 5000 tests, reset the OpenCL context to release the allocated memory if((id_run_test % 5000) == 0) { - cl::Context::setDefault(cl::Context()); - CLScheduler::get().set_context(cl::Context()); - CLKernelLibrary::get().clear_programs_cache(); + auto ctx_properties = CLScheduler::get().context().getInfo(nullptr); + auto queue_properties = CLScheduler::get().queue().getInfo(nullptr); + + cl::Context new_ctx = cl::Context(CL_DEVICE_TYPE_DEFAULT, ctx_properties.data()); + cl::CommandQueue new_queue = cl::CommandQueue(new_ctx, cl::Device::getDefault(), queue_properties); - cl::Context::setDefault(cl::Context(CL_DEVICE_TYPE_DEFAULT)); - CLScheduler::get().set_context(cl::Context::getDefault()); + CLKernelLibrary::get().clear_programs_cache(); + CLScheduler::get().set_context(new_ctx); + CLScheduler::get().set_queue(new_queue); } #endif // ARM_COMPUTE_CL run_test(test_info, *test_factory); diff --git a/tests/validation/CL/FixedPoint/FixedPointTarget.h b/tests/validation/CL/FixedPoint/FixedPointTarget.h index 38473545d9..920bd374d3 100644 --- a/tests/validation/CL/FixedPoint/FixedPointTarget.h +++ b/tests/validation/CL/FixedPoint/FixedPointTarget.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -110,7 +110,7 @@ void compute_target_impl(const TensorShape &shape, DataType dt, FixedPointOp op, sources.push_back(fixed_point_operation_kernel); // Create program - ::cl::Program program(sources); + ::cl::Program program(CLScheduler::get().context(), sources); // Build program program.build(build_opts.c_str()); -- cgit v1.2.1