aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/CL/CLKernelLibrary.cpp5
-rw-r--r--src/core/CL/cl_kernels/gemm.cl2
-rw-r--r--src/core/CL/kernels/CLMeanStdDevKernel.cpp3
3 files changed, 9 insertions, 1 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 9b0633061f..21a0e68958 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -799,6 +799,11 @@ bool CLKernelLibrary::fp16_supported() const
return ::fp16_supported(_device);
}
+bool CLKernelLibrary::int64_base_atomics_supported() const
+{
+ return device_supports_extension(_device, "cl_khr_int64_base_atomics");
+}
+
const Program &CLKernelLibrary::load_program(const std::string &program_name) const
{
const auto program_it = _programs_map.find(program_name);
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index 89d80367d1..ad38c7ebd0 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -2499,6 +2499,7 @@ __kernel void gemm_ma_f32(IMAGE_DECLARATION(src),
vstore4(out, 0, (__global float *)dst.ptr);
}
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
/** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
*
* @note The beta's value need to be passed at compile time using -DBETA
@@ -2535,6 +2536,7 @@ __kernel void gemm_ma_f16(IMAGE_DECLARATION(src),
// Store final result in axb matrix
vstore8(out, 0, (__global half *)dst.ptr);
}
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
#if defined(FIXED_POINT_POSITION)
/** This OpenCL kernel performs the in-place matrix addition between 2 matrices in 8 bit fixed point taking into account that the second matrix might be weighted by a scalar value beta:
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index 1bf831b9d9..fc8764dbfe 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -24,13 +24,13 @@
#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLValidate.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include <cmath>
@@ -51,6 +51,7 @@ BorderSize CLMeanStdDevKernel::border_size() const
void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
{
+ ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON(nullptr == mean);