From 76c8564936a1e0d1be022a2f56dc0a52d638f5d7 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Fri, 25 May 2018 13:53:02 +0100 Subject: COMPMID-1083 : Compute library should be made usable on non-ARM platforms Added * Compile time switches for kernels using FP16 extensions * Validation for support of atomics extension Change-Id: Ia88e601db054ff35f1508988b5e322bd27511ac5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133216 Reviewed-by: Pablo Tello Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/core/CL/CLKernelLibrary.cpp | 5 +++++ src/core/CL/cl_kernels/gemm.cl | 2 ++ src/core/CL/kernels/CLMeanStdDevKernel.cpp | 3 ++- 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src/core') diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 9b0633061f..21a0e68958 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -799,6 +799,11 @@ bool CLKernelLibrary::fp16_supported() const return ::fp16_supported(_device); } +bool CLKernelLibrary::int64_base_atomics_supported() const +{ + return device_supports_extension(_device, "cl_khr_int64_base_atomics"); +} + const Program &CLKernelLibrary::load_program(const std::string &program_name) const { const auto program_it = _programs_map.find(program_name); diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl index 89d80367d1..ad38c7ebd0 100644 --- a/src/core/CL/cl_kernels/gemm.cl +++ b/src/core/CL/cl_kernels/gemm.cl @@ -2499,6 +2499,7 @@ __kernel void gemm_ma_f32(IMAGE_DECLARATION(src), vstore4(out, 0, (__global float *)dst.ptr); } +#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) /** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: * * @note The beta's value need to be passed at compile time using -DBETA @@ -2535,6 +2536,7 @@ __kernel void gemm_ma_f16(IMAGE_DECLARATION(src), // Store final result in axb matrix vstore8(out, 0, (__global half *)dst.ptr); } +#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) #if defined(FIXED_POINT_POSITION) /** This OpenCL kernel performs the in-place matrix addition between 2 matrices in 8 bit fixed point taking into account that the second matrix might be weighted by a scalar value beta: diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 1bf831b9d9..fc8764dbfe 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -24,13 +24,13 @@ #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h" #include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/CLValidate.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" #include @@ -51,6 +51,7 @@ BorderSize CLMeanStdDevKernel::border_size() const void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared) { + ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED(); ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); ARM_COMPUTE_ERROR_ON(nullptr == mean); -- cgit v1.2.1