aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/CLKernelLibrary.cpp
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-04-16 15:08:59 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-06-24 11:16:30 +0000
commit561c176598cd14245e2e7918fdf136d1c888d1da (patch)
tree82adfff6de30292dabbbcc7ced4ae35cac3d45cf /src/core/CL/CLKernelLibrary.cpp
parent31c7c26822270f1c4952c8973aa8bfb38e0a7c68 (diff)
downloadComputeLibrary-561c176598cd14245e2e7918fdf136d1c888d1da.tar.gz
Rework OpenCL Depthwise Convolution
- Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: I253dd5d959a70783c82e62b1771a5e9f91621cb0 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5806 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/CL/CLKernelLibrary.cpp')
-rw-r--r--src/core/CL/CLKernelLibrary.cpp37
1 files changed, 5 insertions, 32 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index bbd4009389..d8983fcae9 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -22,16 +22,13 @@
* SOFTWARE.
*/
#include "arm_compute/core/CL/CLKernelLibrary.h"
-
#include "arm_compute/core/Error.h"
#include "src/core/gpu/cl/ClKernelLibrary.h"
-
#include <algorithm>
#include <array>
#include <fstream>
#include <utility>
#include <vector>
-
namespace arm_compute
{
CLKernelLibrary::CLKernelLibrary()
@@ -39,123 +36,99 @@ CLKernelLibrary::CLKernelLibrary()
{
opencl_is_available(); // Make sure the OpenCL symbols are initialised *before* the CLKernelLibrary is built
}
-
CLKernelLibrary &CLKernelLibrary::get()
{
static CLKernelLibrary _kernel_library;
return _kernel_library;
}
-
Kernel CLKernelLibrary::create_kernel(const std::string &kernel_name, const std::set<std::string> &build_options_set) const
{
- const opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();
-
- const std::string program_name = klib.program_name(kernel_name);
- auto program = klib.program(program_name);
- const std::string &kernel_path = CLKernelLibrary::get().get_kernel_path();
-
+ const opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();
+ const std::string program_name = klib.program_name(kernel_name);
+ auto program = klib.program(program_name);
+ const std::string &kernel_path = CLKernelLibrary::get().get_kernel_path();
return _compile_context.create_kernel(kernel_name, program_name, program.program, kernel_path, build_options_set, program.is_binary);
}
-
std::string CLKernelLibrary::get_program_name(const std::string &kernel_name) const
{
return opencl::ClKernelLibrary::get().program_name(kernel_name);
}
-
void CLKernelLibrary::init(std::string kernel_path, cl::Context context, cl::Device device)
{
_compile_context = CLCompileContext(context, device);
opencl::ClKernelLibrary::get().set_kernel_path(kernel_path);
}
-
void CLKernelLibrary::set_kernel_path(const std::string &kernel_path)
{
opencl::ClKernelLibrary::get().set_kernel_path(kernel_path);
}
-
cl::Context &CLKernelLibrary::context()
{
return _compile_context.context();
}
-
const cl::Device &CLKernelLibrary::get_device()
{
return _compile_context.get_device();
}
-
void CLKernelLibrary::set_device(cl::Device device)
{
_compile_context.set_device(device);
}
-
void CLKernelLibrary::set_context(cl::Context context)
{
_compile_context.set_context(context);
}
-
std::string CLKernelLibrary::get_kernel_path()
{
return opencl::ClKernelLibrary::get().kernel_path();
}
-
void CLKernelLibrary::clear_programs_cache()
{
_compile_context.clear_programs_cache();
}
-
const std::map<std::string, cl::Program> &CLKernelLibrary::get_built_programs() const
{
return _compile_context.get_built_programs();
}
-
void CLKernelLibrary::add_built_program(const std::string &built_program_name, const cl::Program &program)
{
_compile_context.add_built_program(built_program_name, program);
}
-
bool CLKernelLibrary::fp16_supported() const
{
return _compile_context.fp16_supported();
}
-
bool CLKernelLibrary::int64_base_atomics_supported() const
{
return _compile_context.int64_base_atomics_supported();
}
-
bool CLKernelLibrary::is_wbsm_supported()
{
return _compile_context.is_wbsm_supported();
}
-
std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const
{
auto program_info = opencl::ClKernelLibrary::get().program(program_name);
return std::make_pair(std::move(program_info.program), program_info.is_binary);
}
-
size_t CLKernelLibrary::max_local_workgroup_size(const cl::Kernel &kernel) const
{
return _compile_context.max_local_workgroup_size(kernel);
}
-
cl::NDRange CLKernelLibrary::default_ndrange() const
{
return _compile_context.default_ndrange();
}
-
std::string CLKernelLibrary::get_device_version()
{
return _compile_context.get_device_version();
}
-
cl_uint CLKernelLibrary::get_num_compute_units()
{
return _compile_context.get_num_compute_units();
}
-
CLCompileContext &CLKernelLibrary::get_compile_context()
{
return _compile_context;
}
-} // namespace arm_compute
+} // namespace arm_compute \ No newline at end of file