aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-05-29 04:01:51 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-06-01 16:16:45 +0000
commit9fc3be6250964a2da74cb7a05cf8e352a896ac80 (patch)
treecdfeb80dfb556fb05851a8bb36377dcd3a808256 /src/core
parentb4bb6a03f717a320b935809fde795b3d6ec5a69f (diff)
downloadComputeLibrary-9fc3be6250964a2da74cb7a05cf8e352a896ac80.tar.gz
Fuse activation in ClDirectConv2dKernel for float types
Resolves: COMPMID-4430 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I9a40033e09223d601460a7e52cc297c58c9a2737 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5757 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/cl_kernels/direct_convolution.cl4
-rw-r--r--src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp22
-rw-r--r--src/core/gpu/cl/kernels/ClDirectConv2dKernel.h10
3 files changed, 22 insertions, 14 deletions
diff --git a/src/core/CL/cl_kernels/direct_convolution.cl b/src/core/CL/cl_kernels/direct_convolution.cl
index e303d2067d..c5444cd7cc 100644
--- a/src/core/CL/cl_kernels/direct_convolution.cl
+++ b/src/core/CL/cl_kernels/direct_convolution.cl
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
+#include "activation_float_helpers.h"
#include "helpers.h"
#include "helpers_asymm.h"
#include "tile_helpers.h"
@@ -256,6 +257,9 @@ __kernel void direct_convolution_nhwc(
T_QUANTIZE8_ASYMMETRIC(ACC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, c, cq);
#endif // defined(IS_QUANTIZED)
+ // Apply activation
+ T_ACTIVATION(DST_DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, _IOUTPUT_TILE, _IOUTPUT_TILE);
+
// _IOUTPUT_TILE: c = fp32/fp16, cq=qasymm8
// Store the tile in reverse order so the invalid values are overwritten with the valid ones
T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, M0, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, _IOUTPUT_TILE, dst_indirect_y);
diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp
index 2c9a4f301b..94c4044bff 100644
--- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp
@@ -48,7 +48,8 @@ namespace kernels
{
namespace
{
-Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32);
@@ -67,6 +68,8 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co
ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 3 || weights->dimension(width_idx) == 5 || weights->dimension(width_idx) == 9)
&& std::get<0>(conv_info.stride()) > 2,
"Strides larger than 2 not supported for 3x3, 5x5, 9x9 convolution.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(data_layout != DataLayout::NHWC && !is_data_type_float(src->data_type()) && act_info.enabled(),
+ "Activation supported only for floating point and NHWC.");
if(data_layout == DataLayout::NCHW)
{
@@ -375,16 +378,12 @@ BorderSize ClDirectConv2dKernel::border_size() const
}
void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
- const PadStrideInfo &conv_info)
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
// Perform validation
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src,
- weights,
- (biases != nullptr) ? biases : nullptr,
- dst,
- conv_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info));
const int conv_stride_x = std::get<0>(conv_info.stride());
const int conv_stride_y = std::get<1>(conv_info.stride());
@@ -457,6 +456,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
build_options.add_option("-DM0=" + support::cpp11::to_string(m0));
build_options.add_option("-DK0=" + support::cpp11::to_string(k0));
build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+ build_options.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation())));
if(is_data_type_quantized(data_type))
{
@@ -488,6 +488,8 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0));
build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0));
build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0));
+ build_options.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a()));
+ build_options.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b()));
}
}
else
@@ -564,10 +566,10 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT
_config_id += lower_string(string_from_data_layout(_data_layout));
}
-Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info,
- const GPUTarget target)
+Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const GPUTarget target)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), weights->clone().get(), dst->clone().get(), conv_info, target).first);
return Status{};
diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h
index ec76624e5c..e76666fd36 100644
--- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h
+++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h
@@ -34,8 +34,7 @@ namespace opencl
{
namespace kernels
{
-/** Interface for the direct convolution kernel.
- */
+/** Interface for the direct convolution kernel. */
class ClDirectConv2dKernel : public IClKernel
{
public:
@@ -62,15 +61,18 @@ public:
* @param[out] dst Output tensor info.
* The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p src.
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] act_info Contains activaton information described in @ref ActivationLayerInfo.
*/
- void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info);
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info);
/** Static function to check if given info will lead to a valid configuration
*
* Similar to ClDirectConv2dKernel::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const GPUTarget target);
+ static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst,
+ const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const GPUTarget target);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;