aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-06-23 09:34:33 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:14:20 +0100
commit578ab61332aede4e11b7d5d92dbd72993d807a08 (patch)
tree67e4588a570277a42e8a36c66eca24092e89c362
parentec8b45e7333006ae06d834e300c11a4fd1b4c067 (diff)
downloadComputeLibrary-578ab61332aede4e11b7d5d92dbd72993d807a08.tar.gz
COMPMID-414 - Port CLConvolutionLayer to support 8 bit fixed point - CLGEMMMatrixAccumulateBiasesKernel
Change-Id: Idba13b578dc564b8003ce2fa3392eea2af3ce806 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78664 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h2
-rw-r--r--src/core/CL/CLKernelLibrary.cpp3
-rw-r--r--src/core/CL/cl_kernels/gemm.cl44
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp12
4 files changed, 20 insertions, 41 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
index ea1db9f831..74a7a0e4a6 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h
@@ -46,7 +46,7 @@ public:
CLGEMMMatrixAccumulateBiasesKernel &operator=(CLGEMMMatrixAccumulateBiasesKernel &&) = default;
/** Set the accumulate buffer and the biases of the kernel.
*
- * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
+ * @param[in, out] accum The accumulate tensor to convert. Data types supported: QS8/F16/F32
* @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
*/
void configure(ICLTensor *accum, const ICLTensor *biases);
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 45a247db1a..6c64265785 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -151,8 +151,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "finalize", "optical_flow_pyramid_lk.cl" },
{ "gaussian1x5_sub_x", "gaussian_pyramid.cl" },
{ "gaussian5x1_sub_y", "gaussian_pyramid.cl" },
- { "gemm_accumulate_biases_f16", "gemm.cl" },
- { "gemm_accumulate_biases_f32", "gemm.cl" },
+ { "gemm_accumulate_biases", "gemm.cl" },
{ "gemm_interleave4x4_8bit", "gemm.cl" },
{ "gemm_interleave4x4_16bit", "gemm.cl" },
{ "gemm_interleave4x4_32bit", "gemm.cl" },
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index d80b5262a7..9bec8d5d92 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -248,6 +248,8 @@ __kernel void gemm_interleave4x4_8bit(IMAGE_DECLARATION(src),
/** This kernel accumulates each row with the biases vector
*
+ * @note The data type must be passed at compile time -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
+ *
* @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: F32
* @param[in] accum_stride_x Stride of the accmulate tensor in X dimension (in bytes)
* @param[in] accum_step_x accum_stride_x * number of elements along X processed per workitem(in bytes)
@@ -259,48 +261,24 @@ __kernel void gemm_interleave4x4_8bit(IMAGE_DECLARATION(src),
* @param[in] biases_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] biases_offset_first_element_in_bytes The offset of the first element in the destination tensor
*/
-__kernel void gemm_accumulate_biases_f32(
- IMAGE_DECLARATION(accum),
- VECTOR_DECLARATION(biases))
-{
- Image accum = CONVERT_TO_IMAGE_STRUCT(accum);
- Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
-
- float4 accum_value = vload4(0, (__global float *)accum.ptr);
- float4 biases_value = vload4(0, (__global float *)biases.ptr);
- accum_value = biases_value + accum_value;
-
- // Store result in the accummulate buffer
- vstore4(accum_value, 0, (__global float *)accum.ptr);
-}
-
-/** This kernel accumulates each row with the biases vector
- *
- * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: F16
- * @param[in] accum_stride_x Stride of the accumulate tensor in X dimension (in bytes)
- * @param[in] accum_step_x accum_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] accum_stride_y Stride of the accumlulate tensor in Y dimension (in bytes)
- * @param[in] accum_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] accum_offset_first_element_in_bytes The offset of the first element in the accumulate tensor
- * @param[in] biases_ptr Pointer to the biases vector. Same as input.
- * @param[in] biases_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] biases_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void gemm_accumulate_biases_f16(
+#if(defined DATA_TYPE)
+__kernel void gemm_accumulate_biases(
IMAGE_DECLARATION(accum),
VECTOR_DECLARATION(biases))
{
Image accum = CONVERT_TO_IMAGE_STRUCT(accum);
Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
- half8 accum_value = vload8(0, (__global half *)accum.ptr);
- half8 biases_value = vload8(0, (__global half *)biases.ptr);
- accum_value = biases_value + accum_value;
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ accum_value = vload16(0, (__global DATA_TYPE *)accum.ptr);
+ VEC_DATA_TYPE(DATA_TYPE, 16)
+ biases_value = vload16(0, (__global DATA_TYPE *)biases.ptr);
+ accum_value = biases_value + accum_value;
// Store result in the accummulate buffer
- vstore8(accum_value, 0, (__global half *)accum.ptr);
+ vstore16(accum_value, 0, (__global DATA_TYPE *)accum.ptr);
}
+#endif // defined DATA_TYPE
#if(defined WIDTH_MATRIX_B)
/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1)
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
index 289873c23f..75c1a6e629 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -43,20 +43,22 @@ CLGEMMMatrixAccumulateBiasesKernel::CLGEMMMatrixAccumulateBiasesKernel()
void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(biases, accum);
ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1);
_biases = biases;
_accum = accum;
+ std::set<std::string> build_opts;
+ build_opts.insert(("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type())));
+
// Create kernel
- std::string data_type_name = lower_string(string_from_data_type(accum->info()->data_type()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases_" + data_type_name));
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts));
// Configure kernel window
- const unsigned int num_elems_processed_per_iteration = max_cl_vector_width / data_size_from_type(accum->info()->data_type());
+ const unsigned int num_elems_processed_per_iteration = 16;
Window win = calculate_max_window(*_accum->info(), Steps(num_elems_processed_per_iteration));