diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/cl_kernels/direct_convolution.cl | 4 | ||||
-rw-r--r-- | src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 22 | ||||
-rw-r--r-- | src/core/gpu/cl/kernels/ClDirectConv2dKernel.h | 10 | ||||
-rw-r--r-- | src/runtime/gpu/cl/operators/ClDirectConv2d.cpp | 12 |
4 files changed, 30 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/direct_convolution.cl b/src/core/CL/cl_kernels/direct_convolution.cl index e303d2067d..c5444cd7cc 100644 --- a/src/core/CL/cl_kernels/direct_convolution.cl +++ b/src/core/CL/cl_kernels/direct_convolution.cl @@ -22,6 +22,7 @@ * SOFTWARE. */ +#include "activation_float_helpers.h" #include "helpers.h" #include "helpers_asymm.h" #include "tile_helpers.h" @@ -256,6 +257,9 @@ __kernel void direct_convolution_nhwc( T_QUANTIZE8_ASYMMETRIC(ACC_DATA_TYPE, DST_DATA_TYPE, M0, N0, DST_OFFSET, DST_SHIFT, DST_MULTIPLIER, c, cq); #endif // defined(IS_QUANTIZED) + // Apply activation + T_ACTIVATION(DST_DATA_TYPE, M0, N0, ACTIVATION_TYPE, A_VAL, B_VAL, _IOUTPUT_TILE, _IOUTPUT_TILE); + // _IOUTPUT_TILE: c = fp32/fp16, cq=qasymm8 // Store the tile in reverse order so the invalid values are overwritten with the valid ones T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, M0, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, _IOUTPUT_TILE, dst_indirect_y); diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp index 2c9a4f301b..94c4044bff 100644 --- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -48,7 +48,8 @@ namespace kernels { namespace { -Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info) +Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::F16, DataType::F32); @@ -67,6 +68,8 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co ARM_COMPUTE_RETURN_ERROR_ON_MSG((weights->dimension(width_idx) == 3 || weights->dimension(width_idx) == 5 || weights->dimension(width_idx) == 9) && std::get<0>(conv_info.stride()) > 2, "Strides larger than 2 not supported for 3x3, 5x5, 9x9 convolution."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(data_layout != DataLayout::NHWC && !is_data_type_float(src->data_type()) && act_info.enabled(), + "Activation supported only for floating point and NHWC."); if(data_layout == DataLayout::NCHW) { @@ -375,16 +378,12 @@ BorderSize ClDirectConv2dKernel::border_size() const } void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, - const PadStrideInfo &conv_info) + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); // Perform validation - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, - weights, - (biases != nullptr) ? biases : nullptr, - dst, - conv_info)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info)); const int conv_stride_x = std::get<0>(conv_info.stride()); const int conv_stride_y = std::get<1>(conv_info.stride()); @@ -457,6 +456,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-DM0=" + support::cpp11::to_string(m0)); build_options.add_option("-DK0=" + support::cpp11::to_string(k0)); build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); + build_options.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); if(is_data_type_quantized(data_type)) { @@ -488,6 +488,8 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0)); build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0)); build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0)); + build_options.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_options.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); } } else @@ -564,10 +566,10 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT _config_id += lower_string(string_from_data_layout(_data_layout)); } -Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, - const GPUTarget target) +Status ClDirectConv2dKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const GPUTarget target) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, biases, dst, conv_info, act_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), weights->clone().get(), dst->clone().get(), conv_info, target).first); return Status{}; diff --git a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h index ec76624e5c..e76666fd36 100644 --- a/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h +++ b/src/core/gpu/cl/kernels/ClDirectConv2dKernel.h @@ -34,8 +34,7 @@ namespace opencl { namespace kernels { -/** Interface for the direct convolution kernel. - */ +/** Interface for the direct convolution kernel. */ class ClDirectConv2dKernel : public IClKernel { public: @@ -62,15 +61,18 @@ public: * @param[out] dst Output tensor info. * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p src. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] act_info Contains activaton information described in @ref ActivationLayerInfo. */ - void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info); + void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info); /** Static function to check if given info will lead to a valid configuration * * Similar to ClDirectConv2dKernel::configure() * * @return a status */ - static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const GPUTarget target); + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, + const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, const GPUTarget target); // Inherited methods overridden: void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; diff --git a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp index 527b3a65f9..13ef42a640 100644 --- a/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp +++ b/src/runtime/gpu/cl/operators/ClDirectConv2d.cpp @@ -47,10 +47,13 @@ ITensorPack select_activation_src_dst(ITensorPack &tensors) void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + // Configure direct convolution kernel - auto k = std::make_unique<kernels::ClDirectConv2dKernel>(); + const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo(); + auto k = std::make_unique<kernels::ClDirectConv2dKernel>(); k->set_target(CLScheduler::get().target()); - k->configure(compile_context, src, weights, biases, dst, conv_info); + k->configure(compile_context, src, weights, biases, dst, conv_info, conv2d_act_info); _direct_conv_kernel = std::move(k); // Configure border handler @@ -63,7 +66,8 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI b->configure(compile_context, src, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value); _src_border_handler = std::move(b); - if(act_info.enabled()) + // Fused activation is currently supported for NHWC and floating point types + if(act_info.enabled() && !conv2d_act_info.enabled()) { auto a = std::make_unique<kernels::ClActivationKernel>(); a->configure(compile_context, dst, dst, act_info); @@ -77,7 +81,7 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI Status ClDirectConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info) { - ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, CLScheduler::get().target())); + ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDirectConv2dKernel::validate(src, weights, biases, dst, conv_info, ActivationLayerInfo(), CLScheduler::get().target())); if(act_info.enabled()) { ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, dst, act_info)); |