From 06d1efd05a55f40867d68f9583053a40b9a0b55c Mon Sep 17 00:00:00 2001 From: Sheri Zhang Date: Wed, 28 Jul 2021 11:20:04 +0100 Subject: Port CLConvolutionLayer Resolves: COMPMID-4508 Signed-off-by: Sheri Zhang Change-Id: Ifcb6d8ab290655dc8f47a067f23324cf227736f1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6015 Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- Android.bp | 1 + .../runtime/CL/functions/CLConvolutionLayer.h | 23 +- filelist.json | 7 + src/runtime/CL/functions/CLConvolutionLayer.cpp | 220 +++++------------ src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 1 + src/runtime/gpu/cl/operators/ClConcatenate.h | 2 +- src/runtime/gpu/cl/operators/ClConv2d.cpp | 262 +++++++++++++++++++++ src/runtime/gpu/cl/operators/ClConv2d.h | 152 ++++++++++++ tests/validation/CL/ConvolutionLayer.cpp | 74 +++--- 9 files changed, 539 insertions(+), 203 deletions(-) create mode 100644 src/runtime/gpu/cl/operators/ClConv2d.cpp create mode 100644 src/runtime/gpu/cl/operators/ClConv2d.h diff --git a/Android.bp b/Android.bp index dd8ea63d70..d6179cb3c0 100644 --- a/Android.bp +++ b/Android.bp @@ -663,6 +663,7 @@ cc_library_static { "src/runtime/gpu/cl/operators/ClAdd.cpp", "src/runtime/gpu/cl/operators/ClCast.cpp", "src/runtime/gpu/cl/operators/ClConcatenate.cpp", + "src/runtime/gpu/cl/operators/ClConv2d.cpp", "src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp", "src/runtime/gpu/cl/operators/ClCopy.cpp", "src/runtime/gpu/cl/operators/ClCrop.cpp", diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 6884754d83..12b3ca1fd2 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -24,10 +24,9 @@ #ifndef ARM_COMPUTE_CLCONVOLUTIONLAYER_H #define ARM_COMPUTE_CLCONVOLUTIONLAYER_H -#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h" -#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -35,11 +34,15 @@ namespace arm_compute { +class CLCompileContext; +class ICLTensor; +class ITensorInfo; + /** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: * - * -# @ref CLGEMMConvolutionLayer - * -# @ref CLWinogradConvolutionLayer - * -# @ref CLDirectConvolutionLayer + * -# @ref opencl::ClGemmConvolution + * -# @ref opencl::ClWinogradConv2d + * -# @ref opencl::ClDirectConv2d * -# @ref CLFFTConvolutionLayer * * The function selects one of the algorithms mentioned above based on: @@ -182,7 +185,7 @@ public: * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false * - * @return a status + * @return the Convolution Method Hint */ static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U), bool enable_fast_math = false); @@ -191,8 +194,8 @@ public: void prepare() override; private: - std::shared_ptr _memory_manager; - std::unique_ptr _function; + struct Impl; + std::unique_ptr _impl; }; } #endif /* ARM_COMPUTE_CLCONVOLUTIONLAYER_H */ diff --git a/filelist.json b/filelist.json index 73ef4c73f5..f255d76962 100644 --- a/filelist.json +++ b/filelist.json @@ -177,6 +177,13 @@ ] } }, + "Conv2d": { + "files": { + "operator": [ + "src/runtime/gpu/cl/operators/ClConv2d.cpp" + ] + } + }, "PRelu": { "files": { "operator": [ diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index cc6d9e142d..b295a274bd 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -23,24 +23,36 @@ */ #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/Validate.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/quantization/AsymmHelpers.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include -#include -#include +#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" +#include "src/core/CL/ICLKernel.h" +#include "src/core/helpers/MemoryHelpers.h" +#include "src/runtime/gpu/cl/operators/ClConv2d.h" +#include "support/Cast.h" namespace arm_compute { using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::experimental; +struct CLConvolutionLayer::Impl +{ + MemoryGroup memory_group{}; + std::shared_ptr memory_manager{}; + std::unique_ptr op{ nullptr }; + ITensorPack run_pack{}; + ITensorPack prep_pack{}; + WorkspaceData workspace{}; + experimental::MemoryRequirements aux_mem_req{}; + std::unique_ptr func{ nullptr }; +}; CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_manager) - : _memory_manager(std::move(memory_manager)), _function() + : _impl(std::make_unique()) { + _impl->memory_manager = std::move(memory_manager); } CLConvolutionLayer::~CLConvolutionLayer() = default; @@ -59,43 +71,40 @@ void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLT ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups)); - switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, - weights_info, act_info, CLScheduler::get().target(), dilation, enable_fast_math)) + const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups); + + switch(opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info, + weights_info, CLScheduler::get().target())) { case ConvolutionMethod::WINOGRAD: - { - ARM_COMPUTE_ERROR_ON(num_groups != 1); - auto f = std::make_unique(_memory_manager); - f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math); - _function = std::move(f); - break; - } case ConvolutionMethod::DIRECT: - { - ARM_COMPUTE_ERROR_ON(num_groups != 1); - auto f = std::make_unique(); - f->configure(compile_context, input, weights, biases, output, conv_info, act_info); - _function = std::move(f); - break; - } case ConvolutionMethod::GEMM: { - auto f = std::make_unique(_memory_manager); - f->configure(compile_context, input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups); - _function = std::move(f); + auto f = std::make_unique(); + f->configure(compile_context, input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info); + _impl->op = std::move(f); break; } case ConvolutionMethod::FFT: { - auto f = std::make_unique(_memory_manager); + auto f = std::make_unique(_impl->memory_manager); f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math); - _function = std::move(f); + _impl->func = std::move(f); break; } default: ARM_COMPUTE_ERROR("Not supported."); break; } + + if(_impl->op) + { + _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager)); + _impl->aux_mem_req = _impl->op->workspace(); + _impl->run_pack = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } }; + _impl->prep_pack = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } }; + _impl->workspace = manage_workspace(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack); + } } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, @@ -104,28 +113,16 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); - const GPUTarget gpu_target = CLScheduler::get().target(); + const GPUTarget gpu_target = CLScheduler::get().target(); + const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups); - switch(CLConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, act_info, gpu_target, dilation, enable_fast_math)) + switch(opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target)) { case ConvolutionMethod::WINOGRAD: - { - //Validate Winograd - ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLWinogradConvolutionLayer is not supported"); - ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math)); - break; - } case ConvolutionMethod::DIRECT: - { - // Validate direct convolution layer - ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLDirectConvolutionLayer is not supported"); - ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info)); - break; - } case ConvolutionMethod::GEMM: { - // Validate gemm-based convolution layer - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups)); + ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info)); break; } case ConvolutionMethod::FFT: @@ -145,125 +142,38 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON_NULLPTR(weights); - ARM_COMPUTE_UNUSED(weights_info); - ARM_COMPUTE_UNUSED(gpu_target); - - const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); + const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1); + return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target); +} - /* Input spatial dims, kernel size, IFM/OFM, conv info*/ - using ConvolutionConfiguration = std::tuple; - using ConfigurationMethod = std::pair; +void CLConvolutionLayer::run() +{ + prepare(); - const std::vector known_configs = - { - // Alexnet - ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT), - // VGG16 / VGG19 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT), - // Mobilenet 224 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM), - // Mobilenet 160 - ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM), - // Mobilenet 224 - ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM), - // Mobilenet 160 - ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM), - }; + MemoryGroupResourceScope scope_mg(_impl->memory_group); - const auto find_config = [&](ConfigurationMethod c) + if(_impl->func) { - const ConvolutionConfiguration config = c.first; - const PadStrideInfo info = std::get<3>(config); - const DataLayout data_layout = std::get<4>(config); - - return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) - && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() - && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == input->data_layout()); - }; - - std::vector::const_iterator found; - if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) + _impl->func->run(); + } + else { - return (*found).second; + _impl->op->run(_impl->run_pack); } +} - if(dilation != Size2D(1U, 1U)) +void CLConvolutionLayer::prepare() +{ + if(_impl->func) { - return ConvolutionMethod::GEMM; + _impl->func->prepare(); } else { - if(input->data_layout() == DataLayout::NCHW) - { - // SRGAN - if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) - && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) - { - return ConvolutionMethod::DIRECT; - } - if((weights->dimension(idx_h) > 5) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math))) - { - return ConvolutionMethod::FFT; - } - if(input->dimension(idx_c) < 16) - { - return ConvolutionMethod::GEMM; - } - return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; - } - else - { - const bool is_direct_valid = bool(CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)); - const bool is_wino_valid = bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)); - - // SRGAN case - if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) - && is_direct_valid) - { - return ConvolutionMethod::DIRECT; - } - - // Floating-point case: GeMM/Direct/Winograd - if(is_data_type_float(input->data_type())) - { - const bool is_large_kernel_sz = (weights->dimension(idx_w) >= 5) && (weights->dimension(idx_h) >= 5); - const bool is_ifm_ge_16 = input->dimension(idx_c) >= 16; - const bool are_ifm_ge_ofm = input->dimension(idx_c) >= output->dimension(idx_c); + _impl->op->prepare(_impl->prep_pack); - // Run Winograd if valid and IFM >= 16 - if(is_wino_valid && is_ifm_ge_16) - { - return ConvolutionMethod::WINOGRAD; - } - // Run Direct for Large kernel size - if(is_direct_valid && is_large_kernel_sz && is_ifm_ge_16 && are_ifm_ge_ofm) - { - return ConvolutionMethod::DIRECT; - } - - // Default case - return ConvolutionMethod::GEMM; - } - - // Generic case for quantized. Only GeMM - return ConvolutionMethod::GEMM; - } + // Release temporary tensors that are only used in prepare stage + release_temporaries(_impl->aux_mem_req, _impl->workspace); } } - -void CLConvolutionLayer::run() -{ - prepare(); - _function->run(); -} - -void CLConvolutionLayer::prepare() -{ - _function->prepare(); -} -} // namespace arm_compute +} // namespace arm_compute \ No newline at end of file diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index 918848745e..8038a75dc1 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "src/core/CL/ICLKernel.h" #include #include diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h index fb1235b9be..153400bd73 100644 --- a/src/runtime/gpu/cl/operators/ClConcatenate.h +++ b/src/runtime/gpu/cl/operators/ClConcatenate.h @@ -75,5 +75,5 @@ private: unsigned int _axis{ 0 }; }; } // namespace opencl -} // namespace arm_comPUTE +} // namespace arm_compute #endif /* ARM_COMPUTE_CL_CONCATENATE_H */ diff --git a/src/runtime/gpu/cl/operators/ClConv2d.cpp b/src/runtime/gpu/cl/operators/ClConv2d.cpp new file mode 100644 index 0000000000..4cd65290f3 --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConv2d.cpp @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "src/runtime/gpu/cl/operators/ClConv2d.h" + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h" +#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h" +#include "src/runtime/gpu/cl/operators/ClGemmConvolution.h" +#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h" + +#include + +namespace arm_compute +{ +namespace opencl +{ +using namespace arm_compute::misc::shape_calculator; + +ClConv2d::ClConv2d() + : _operator() +{ +} + +ClConv2d::~ClConv2d() = default; + +void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, + const WeightsInfo &weights_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_ERROR_THROW_ON(ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info)); + + switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target())) + { + case ConvolutionMethod::WINOGRAD: + { + ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); + auto f = std::make_unique(); + f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math); + _operator = std::move(f); + break; + } + case ConvolutionMethod::DIRECT: + { + ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); + auto f = std::make_unique(); + f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info); + _operator = std::move(f); + break; + } + case ConvolutionMethod::GEMM: + { + auto f = std::make_unique(); + f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info); + _operator = std::move(f); + break; + } + default: + ARM_COMPUTE_ERROR("Not supported."); + break; + } + _aux_mem = _operator->workspace(); +} + +Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, + const WeightsInfo &weights_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); + + const GPUTarget gpu_target = CLScheduler::get().target(); + + switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target)) + { + case ConvolutionMethod::WINOGRAD: + { + //Validate Winograd + ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported"); + ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math)); + break; + } + case ConvolutionMethod::DIRECT: + { + // Validate direct convolution layer + ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported"); + ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info)); + break; + } + case ConvolutionMethod::GEMM: + { + // Validate gemm-based convolution layer + ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConvolution::validate(src, weights, biases, dst, conv2d_info, weights_info)); + break; + } + default: + ARM_COMPUTE_ERROR("Not supported."); + break; + } + + return Status{}; +} + +ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, + const WeightsInfo &weights_info, const GPUTarget gpu_target) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(src); + ARM_COMPUTE_ERROR_ON_NULLPTR(dst); + ARM_COMPUTE_ERROR_ON_NULLPTR(weights); + ARM_COMPUTE_UNUSED(weights_info); + ARM_COMPUTE_UNUSED(gpu_target); + + const PadStrideInfo conv_info = conv2d_info.conv_info; + const ActivationLayerInfo act_info = conv2d_info.act_info; + const Size2D dilation = conv2d_info.dilation; + bool enable_fast_math = conv2d_info.enable_fast_math; + + const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL); + + /* Input spatial dims, kernel size, IFM/OFM, conv info*/ + using ConvolutionConfiguration = std::tuple; + using ConfigurationMethod = std::pair; + + const std::vector known_configs = + { + // Alexnet + ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT), + // VGG16 / VGG19 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT), + // Mobilenet 224 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM), + // Mobilenet 160 + ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM), + // Mobilenet 224 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM), + // Mobilenet 160 + ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM), + }; + + const auto find_config = [&](ConfigurationMethod c) + { + const ConvolutionConfiguration config = c.first; + const PadStrideInfo info = std::get<3>(config); + const DataLayout data_layout = std::get<4>(config); + + return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) + && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() + && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == src->data_layout()); + }; + + std::vector::const_iterator found; + if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) + { + return (*found).second; + } + + if(dilation != Size2D(1U, 1U)) + { + return ConvolutionMethod::GEMM; + } + else + { + if(src->data_layout() == DataLayout::NCHW) + { + // SRGAN + if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) + && (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info))) + { + return ConvolutionMethod::DIRECT; + } + if((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math))) + { + return ConvolutionMethod::FFT; + } + if(src->dimension(idx_c) < 16) + { + return ConvolutionMethod::GEMM; + } + return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; + } + else + { + const bool is_direct_valid = bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)); + const bool is_wino_valid = bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)); + + // SRGAN case + if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3) + && is_direct_valid) + { + return ConvolutionMethod::DIRECT; + } + + // Floating-point case: GeMM/Direct/Winograd + if(is_data_type_float(src->data_type())) + { + const bool is_large_kernel_sz = (weights->dimension(idx_w) >= 7) && (weights->dimension(idx_h) >= 7); + const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16; + + // Run Winograd if valid and IFM >= 16 + if(is_wino_valid && is_ifm_ge_16) + { + return ConvolutionMethod::WINOGRAD; + } + // Run Direct for Large kernel size + if(is_large_kernel_sz && is_ifm_ge_16 && is_direct_valid) + { + return ConvolutionMethod::DIRECT; + } + + // Default case + return ConvolutionMethod::GEMM; + } + + // Generic case for quantized. Only GeMM + return ConvolutionMethod::GEMM; + } + } +} + +void ClConv2d::run(ITensorPack &tensors) +{ + prepare(tensors); + _operator->run(tensors); +} + +void ClConv2d::prepare(ITensorPack &tensors) +{ + _operator->prepare(tensors); +} + +experimental::MemoryRequirements ClConv2d::workspace() const +{ + return _aux_mem; +} +} // namespace opencl +} // namespace arm_compute diff --git a/src/runtime/gpu/cl/operators/ClConv2d.h b/src/runtime/gpu/cl/operators/ClConv2d.h new file mode 100644 index 0000000000..0888c2f47b --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClConv2d.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CLCONV2D_H +#define ARM_COMPUTE_CLCONV2D_H + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/FunctionDescriptors.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/core/gpu/cl/IClKernel.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +namespace arm_compute +{ +namespace opencl +{ +/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: + * + * -# @ref opencl::ClGemmConvolution + * -# @ref opencl::ClWinogradConv2d + * -# @ref opencl::ClDirectConv2d + * -# @ref CLFFTConvolutionLayer + * + * The function selects one of the algorithms mentioned above based on: + * - The size of the kernel + * - Number of src/dst feature maps + * - Amount of memory needed + * + * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed. + * + * FP32 Algorithm| Filter Size | Input/Output feature maps | + * --------------|-------------------------------------------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 | + * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd 5x5 requires fast maths enabled. + * + * FP16 Algorithm| Filter Size | Input/Output feature maps | + * --------------|----------------------------|-------------------------------------------| + * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5 | Input channels is greater than 3 | + * FFT | Not supported | | + * DirectConv | 9x9 | | + * GEMM | Any size | | + * + * Winograd FP16 requires fast maths enabled. + * + */ +class ClConv2d : public IClOperator +{ +public: + /** Default constructor */ + ClConv2d(); + /** Default Destructor */ + ~ClConv2d(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ClConv2d(const ClConv2d &) = delete; + /** Default move constructor */ + ClConv2d(ClConv2d &&) = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + ClConv2d &operator=(const ClConv2d &) = delete; + /** Default move assignment operator */ + ClConv2d &operator=(ClConv2d &&) = default; + /** Set the src and dst tensors. + * + * Valid data layouts: + * - NHWC + * - NCHW + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:--------------|:------------------|:------|:--------------| + * |F16 |F16 |F16 |F16 | + * |F32 |F32 |F32 |F32 | + * |QASYMM8 |QASYMM8 |S32 |QASYMM8 | + * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 | + * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED | + * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED | + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor info. 3 lower dimensions represent a single src [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of srcs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED. + * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Same as @p src, except for src of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type. + * @param[out] dst Destination tensor info. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. + * Data types supported: Same as @p src. + * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p src. + */ + void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info, + const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref ClConv2d + * + * Similar to ClConv2d::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, + const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will return the convolution called by @ref ClConv2d + * + * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of srcs. + * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED. + * @param[in] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts. + * Data types supported: Same as @p src. + * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. + * @param[in] gpu_target Specifies the @p GPUTarget. + * + * @return the Convolution Method Hint + */ + static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info, + const WeightsInfo &weights_info, const GPUTarget gpu_target); + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &tensors) override; + experimental::MemoryRequirements workspace() const override; + +private: + std::unique_ptr _operator; + experimental::MemoryRequirements _aux_mem{}; +}; +} // namespace opencl +} // namespace arm_compute +#endif /* ARM_COMPUTE_CLCONV2D_H */ diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp index 31eed7646c..6824ce1413 100644 --- a/tests/validation/CL/ConvolutionLayer.cpp +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -216,18 +216,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture, framework // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine( - framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), - framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), - framework::dataset::make("Bias", TensorShape(2U))), - framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), - framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), - framework::dataset::make("Dilation", Size2D(1, 1))), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType",DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - ActivationFunctionsSmallDataset)) +FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), + framework::dataset::make("Bias", TensorShape(2U))), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + framework::dataset::make("Dilation", Size2D(1, 1))), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -288,18 +288,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), - framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), - framework::dataset::make("Bias", TensorShape(2U))), - framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), - framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), - framework::dataset::make("Dilation", Size2D(1, 1))), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - QuantizationData), - QuantizedActivationFunctionsSmallDataset)) + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), + framework::dataset::make("Bias", TensorShape(2U))), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + framework::dataset::make("Dilation", Size2D(1, 1))), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + QuantizationData), + QuantizedActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); @@ -318,18 +318,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture, validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( - framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), - framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), - framework::dataset::make("Bias", TensorShape(2U))), - framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), - framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), - framework::dataset::make("Dilation", Size2D(1, 1))), - framework::dataset::make("ReshapeWeights", { true })), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), - QuantizationData), - QuantizedActivationFunctionsSmallDataset)) + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + framework::dataset::make("Input", TensorShape(23U, 27U, 5U)), + framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))), + framework::dataset::make("Bias", TensorShape(2U))), + framework::dataset::make("Output", TensorShape(11U, 25U, 2U))), + framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))), + framework::dataset::make("Dilation", Size2D(1, 1))), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + QuantizationData), + QuantizedActivationFunctionsSmallDataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); -- cgit v1.2.1