aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2020-06-02 12:12:35 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2020-06-02 17:18:29 +0000
commita98dee2da0aef1c53a31045b0c681fb0abc8f8ba (patch)
treeb4f0a130b19c47f19d9e5b326c18cb9cc8e765e2 /src/core
parent16bd6dd97fb704e31cd96e404e8c4148fe24d834 (diff)
downloadComputeLibrary-a98dee2da0aef1c53a31045b0c681fb0abc8f8ba.tar.gz
COMPMID-3319: Force padding requirement in CLGEMMReshapeRHSMatrixKernel
Added padding requirement in CLGEMMReshapeRHSMatrixKernel in order to create 2d image from a cl_buffer. Test extended in order to validate the padding requirement Change-Id: I36bcaf3e9299ee186602b4e3456851cc8cda6ce6 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3292 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/CLHelpers.cpp21
-rw-r--r--src/core/CL/OpenCL.cpp25
-rw-r--r--src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp30
3 files changed, 76 insertions, 0 deletions
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 15f45d52d9..44695d00fe 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -370,6 +370,27 @@ bool preferred_dummy_work_items_support(const cl::Device &device)
return true;
}
+bool image2d_from_buffer_supported(const cl::Device &device)
+{
+ return device_supports_extension(device, "cl_khr_image2d_from_buffer");
+}
+
+size_t get_cl_image_pitch_alignment(const cl::Device &device)
+{
+ cl_uint pixel_aligment = 0;
+
+ cl_int err = clGetDeviceInfo(device(), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof(cl_uint), &pixel_aligment, nullptr);
+
+ if(err == CL_SUCCESS)
+ {
+ return pixel_aligment;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
cl::Kernel create_opencl_kernel(CLCoreRuntimeContext *ctx, const std::string &kernel_name, const CLBuildOptions &build_opts)
{
if(ctx && ctx->kernel_library())
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index 9a3e344f1f..809f21b89e 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -133,6 +133,7 @@ bool CLSymbols::load(const std::string &library)
LOAD_FUNCTION_PTR(clEnqueueSVMUnmap, handle);
LOAD_FUNCTION_PTR(clEnqueueMarker, handle);
LOAD_FUNCTION_PTR(clWaitForEvents, handle);
+ LOAD_FUNCTION_PTR(clCreateImage, handle);
// Third-party extensions
LOAD_FUNCTION_PTR(clImportMemoryARM, handle);
@@ -938,6 +939,30 @@ clGetEventProfilingInfo(cl_event event,
}
cl_mem
+clCreateImage(cl_context context,
+ cl_mem_flags flags,
+ const cl_image_format *image_format,
+ const cl_image_desc *image_desc,
+ void *host_ptr,
+ cl_int *errcode_ret)
+{
+ arm_compute::CLSymbols::get().load_default();
+ auto func = arm_compute::CLSymbols::get().clCreateImage_ptr;
+ if(func != nullptr)
+ {
+ return func(context, flags, image_format, image_desc, host_ptr, errcode_ret);
+ }
+ else
+ {
+ if(errcode_ret != nullptr)
+ {
+ *errcode_ret = CL_OUT_OF_RESOURCES;
+ }
+ return nullptr;
+ }
+}
+
+cl_mem
clImportMemoryARM(cl_context context,
cl_mem_flags flags,
const cl_import_properties_arm *properties,
diff --git a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
index 4217932097..43e7b92c6a 100644
--- a/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.cpp
@@ -54,6 +54,23 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON(rhs_info.n0 > 16);
ARM_COMPUTE_RETURN_ERROR_ON(rhs_info.k0 > 16);
ARM_COMPUTE_RETURN_ERROR_ON((rhs_info.k0 == 1) && (rhs_info.transpose));
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image && ((rhs_info.n0 != 4) || input->data_type() != DataType::F32), "Export to cl_image only supported with n0 = 4 and F32 data type");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image
+ && !image2d_from_buffer_supported(CLKernelLibrary::get().get_device()), "The extension cl_khr_image2d_from_buffer is not supported on the target platform");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs_info.export_to_cl_image && (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0), "Impossible to retrieve the cl_image pitch alignment");
+
+ if(rhs_info.export_to_cl_image)
+ {
+ TensorShape output_shape = compute_rhs_reshaped_shape(*input, rhs_info);
+
+ // Check the width and height of the output tensor.
+ // Since we cannot create a 3d image from a buffer, the third dimension is collapsed with the second dimension
+ size_t max_image_w = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
+ size_t max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape[0] > max_image_w * 4, "Not supported width for cl_image");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape[1] * output_shape[2] > max_image_h, "Not supported height for cl_image");
+ }
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
@@ -86,6 +103,19 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
window_changed = update_window_and_padding(win, input_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->tensor_shape()));
+ if(rhs_info.export_to_cl_image)
+ {
+ constexpr unsigned int num_floats_per_pixel = 4;
+
+ const unsigned int stride_y_in_elements = output->strides_in_bytes()[1] / output->element_size();
+ const unsigned int pixel_aligment = get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device());
+ const unsigned int row_pitch_alignment = pixel_aligment * num_floats_per_pixel;
+ const unsigned int round_up_width = ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
+ const unsigned int padding = round_up_width - stride_y_in_elements;
+
+ output->extend_padding(PaddingSize(0, padding, 0, 0));
+ }
+
// Collapse along the Z direction
// This collapse needs to be here in order to tune the Z dimension of LWS
Window collapsed = win.collapse(win, Window::DimZ);