aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSheri Zhang <sheri.zhang@arm.com>2021-07-28 11:20:04 +0100
committerSheri Zhang <sheri.zhang@arm.com>2021-08-02 10:21:15 +0000
commit06d1efd05a55f40867d68f9583053a40b9a0b55c (patch)
treefebdc5b5238c68feadec9b9548b5dd25cfb2aab2
parent529b5a2355ce6354af3ea9f97af810a94908e7fe (diff)
downloadComputeLibrary-06d1efd05a55f40867d68f9583053a40b9a0b55c.tar.gz
Port CLConvolutionLayer
Resolves: COMPMID-4508 Signed-off-by: Sheri Zhang <sheri.zhang@arm.com> Change-Id: Ifcb6d8ab290655dc8f47a067f23324cf227736f1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6015 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h23
-rw-r--r--filelist.json7
-rw-r--r--src/runtime/CL/functions/CLConvolutionLayer.cpp220
-rw-r--r--src/runtime/CL/functions/CLDeconvolutionLayer.cpp1
-rw-r--r--src/runtime/gpu/cl/operators/ClConcatenate.h2
-rw-r--r--src/runtime/gpu/cl/operators/ClConv2d.cpp262
-rw-r--r--src/runtime/gpu/cl/operators/ClConv2d.h152
-rw-r--r--tests/validation/CL/ConvolutionLayer.cpp74
9 files changed, 539 insertions, 203 deletions
diff --git a/Android.bp b/Android.bp
index dd8ea63d70..d6179cb3c0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -663,6 +663,7 @@ cc_library_static {
"src/runtime/gpu/cl/operators/ClAdd.cpp",
"src/runtime/gpu/cl/operators/ClCast.cpp",
"src/runtime/gpu/cl/operators/ClConcatenate.cpp",
+ "src/runtime/gpu/cl/operators/ClConv2d.cpp",
"src/runtime/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp",
"src/runtime/gpu/cl/operators/ClCopy.cpp",
"src/runtime/gpu/cl/operators/ClCrop.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 6884754d83..12b3ca1fd2 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -24,10 +24,9 @@
#ifndef ARM_COMPUTE_CLCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLCONVOLUTIONLAYER_H
-#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
@@ -35,11 +34,15 @@
namespace arm_compute
{
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+
/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions:
*
- * -# @ref CLGEMMConvolutionLayer
- * -# @ref CLWinogradConvolutionLayer
- * -# @ref CLDirectConvolutionLayer
+ * -# @ref opencl::ClGemmConvolution
+ * -# @ref opencl::ClWinogradConv2d
+ * -# @ref opencl::ClDirectConv2d
* -# @ref CLFFTConvolutionLayer
*
* The function selects one of the algorithms mentioned above based on:
@@ -182,7 +185,7 @@ public:
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
*
- * @return a status
+ * @return the Convolution Method Hint
*/
static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U), bool enable_fast_math = false);
@@ -191,8 +194,8 @@ public:
void prepare() override;
private:
- std::shared_ptr<IMemoryManager> _memory_manager;
- std::unique_ptr<IFunction> _function;
+ struct Impl;
+ std::unique_ptr<Impl> _impl;
};
}
#endif /* ARM_COMPUTE_CLCONVOLUTIONLAYER_H */
diff --git a/filelist.json b/filelist.json
index 73ef4c73f5..f255d76962 100644
--- a/filelist.json
+++ b/filelist.json
@@ -177,6 +177,13 @@
]
}
},
+ "Conv2d": {
+ "files": {
+ "operator": [
+ "src/runtime/gpu/cl/operators/ClConv2d.cpp"
+ ]
+ }
+ },
"PRelu": {
"files": {
"operator": [
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index cc6d9e142d..b295a274bd 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -23,24 +23,36 @@
*/
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#include <cmath>
-#include <memory>
-#include <tuple>
+#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/helpers/MemoryHelpers.h"
+#include "src/runtime/gpu/cl/operators/ClConv2d.h"
+#include "support/Cast.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::experimental;
+struct CLConvolutionLayer::Impl
+{
+ MemoryGroup memory_group{};
+ std::shared_ptr<IMemoryManager> memory_manager{};
+ std::unique_ptr<opencl::IClOperator> op{ nullptr };
+ ITensorPack run_pack{};
+ ITensorPack prep_pack{};
+ WorkspaceData<CLTensor> workspace{};
+ experimental::MemoryRequirements aux_mem_req{};
+ std::unique_ptr<IFunction> func{ nullptr };
+};
CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_manager(std::move(memory_manager)), _function()
+ : _impl(std::make_unique<Impl>())
{
+ _impl->memory_manager = std::move(memory_manager);
}
CLConvolutionLayer::~CLConvolutionLayer() = default;
@@ -59,43 +71,40 @@ void CLConvolutionLayer::configure(const CLCompileContext &compile_context, ICLT
ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
enable_fast_math, num_groups));
- switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info,
- weights_info, act_info, CLScheduler::get().target(), dilation, enable_fast_math))
+ const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
+
+ switch(opencl::ClConv2d::get_convolution_method(input->info(), weights->info(), output->info(), conv2d_info,
+ weights_info, CLScheduler::get().target()))
{
case ConvolutionMethod::WINOGRAD:
- {
- ARM_COMPUTE_ERROR_ON(num_groups != 1);
- auto f = std::make_unique<CLWinogradConvolutionLayer>(_memory_manager);
- f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
- _function = std::move(f);
- break;
- }
case ConvolutionMethod::DIRECT:
- {
- ARM_COMPUTE_ERROR_ON(num_groups != 1);
- auto f = std::make_unique<CLDirectConvolutionLayer>();
- f->configure(compile_context, input, weights, biases, output, conv_info, act_info);
- _function = std::move(f);
- break;
- }
case ConvolutionMethod::GEMM:
{
- auto f = std::make_unique<CLGEMMConvolutionLayer>(_memory_manager);
- f->configure(compile_context, input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups);
- _function = std::move(f);
+ auto f = std::make_unique<opencl::ClConv2d>();
+ f->configure(compile_context, input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv2d_info, weights_info);
+ _impl->op = std::move(f);
break;
}
case ConvolutionMethod::FFT:
{
- auto f = std::make_unique<CLFFTConvolutionLayer>(_memory_manager);
+ auto f = std::make_unique<CLFFTConvolutionLayer>(_impl->memory_manager);
f->configure(compile_context, input, weights, biases, output, conv_info, act_info, enable_fast_math);
- _function = std::move(f);
+ _impl->func = std::move(f);
break;
}
default:
ARM_COMPUTE_ERROR("Not supported.");
break;
}
+
+ if(_impl->op)
+ {
+ _impl->memory_group = MemoryGroup(std::move(_impl->memory_manager));
+ _impl->aux_mem_req = _impl->op->workspace();
+ _impl->run_pack = { { ACL_SRC_0, input }, { ACL_SRC_1, weights }, { ACL_SRC_2, biases }, { ACL_DST, output } };
+ _impl->prep_pack = { { ACL_SRC_1, weights }, { ACL_SRC_2, biases } };
+ _impl->workspace = manage_workspace<CLTensor>(_impl->aux_mem_req, _impl->memory_group, _impl->run_pack, _impl->prep_pack);
+ }
}
Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
@@ -104,28 +113,16 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
- const GPUTarget gpu_target = CLScheduler::get().target();
+ const GPUTarget gpu_target = CLScheduler::get().target();
+ const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
- switch(CLConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, act_info, gpu_target, dilation, enable_fast_math))
+ switch(opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target))
{
case ConvolutionMethod::WINOGRAD:
- {
- //Validate Winograd
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLWinogradConvolutionLayer is not supported");
- ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math));
- break;
- }
case ConvolutionMethod::DIRECT:
- {
- // Validate direct convolution layer
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLDirectConvolutionLayer is not supported");
- ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info));
- break;
- }
case ConvolutionMethod::GEMM:
{
- // Validate gemm-based convolution layer
- ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups));
+ ARM_COMPUTE_RETURN_ON_ERROR(opencl::ClConv2d::validate(input, weights, biases, output, conv2d_info, weights_info));
break;
}
case ConvolutionMethod::FFT:
@@ -145,125 +142,38 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info,
const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math)
{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_ERROR_ON_NULLPTR(weights);
- ARM_COMPUTE_UNUSED(weights_info);
- ARM_COMPUTE_UNUSED(gpu_target);
-
- const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
- const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
+ const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, 1);
+ return opencl::ClConv2d::get_convolution_method(input, weights, output, conv2d_info, weights_info, gpu_target);
+}
- /* Input spatial dims, kernel size, IFM/OFM, conv info*/
- using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;
- using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
+void CLConvolutionLayer::run()
+{
+ prepare();
- const std::vector<ConfigurationMethod> known_configs =
- {
- // Alexnet
- ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
- // VGG16 / VGG19
- ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
- // Mobilenet 224
- ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
- // Mobilenet 160
- ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
- // Mobilenet 224
- ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
- // Mobilenet 160
- ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
- };
+ MemoryGroupResourceScope scope_mg(_impl->memory_group);
- const auto find_config = [&](ConfigurationMethod c)
+ if(_impl->func)
{
- const ConvolutionConfiguration config = c.first;
- const PadStrideInfo info = std::get<3>(config);
- const DataLayout data_layout = std::get<4>(config);
-
- return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
- && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
- && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == input->data_layout());
- };
-
- std::vector<ConfigurationMethod>::const_iterator found;
- if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
+ _impl->func->run();
+ }
+ else
{
- return (*found).second;
+ _impl->op->run(_impl->run_pack);
}
+}
- if(dilation != Size2D(1U, 1U))
+void CLConvolutionLayer::prepare()
+{
+ if(_impl->func)
{
- return ConvolutionMethod::GEMM;
+ _impl->func->prepare();
}
else
{
- if(input->data_layout() == DataLayout::NCHW)
- {
- // SRGAN
- if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
- && (CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
- {
- return ConvolutionMethod::DIRECT;
- }
- if((weights->dimension(idx_h) > 5) && (input->dimension(idx_c) > output->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)))
- {
- return ConvolutionMethod::FFT;
- }
- if(input->dimension(idx_c) < 16)
- {
- return ConvolutionMethod::GEMM;
- }
- return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
- }
- else
- {
- const bool is_direct_valid = bool(CLDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info));
- const bool is_wino_valid = bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math));
-
- // SRGAN case
- if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
- && is_direct_valid)
- {
- return ConvolutionMethod::DIRECT;
- }
-
- // Floating-point case: GeMM/Direct/Winograd
- if(is_data_type_float(input->data_type()))
- {
- const bool is_large_kernel_sz = (weights->dimension(idx_w) >= 5) && (weights->dimension(idx_h) >= 5);
- const bool is_ifm_ge_16 = input->dimension(idx_c) >= 16;
- const bool are_ifm_ge_ofm = input->dimension(idx_c) >= output->dimension(idx_c);
+ _impl->op->prepare(_impl->prep_pack);
- // Run Winograd if valid and IFM >= 16
- if(is_wino_valid && is_ifm_ge_16)
- {
- return ConvolutionMethod::WINOGRAD;
- }
- // Run Direct for Large kernel size
- if(is_direct_valid && is_large_kernel_sz && is_ifm_ge_16 && are_ifm_ge_ofm)
- {
- return ConvolutionMethod::DIRECT;
- }
-
- // Default case
- return ConvolutionMethod::GEMM;
- }
-
- // Generic case for quantized. Only GeMM
- return ConvolutionMethod::GEMM;
- }
+ // Release temporary tensors that are only used in prepare stage
+ release_temporaries(_impl->aux_mem_req, _impl->workspace);
}
}
-
-void CLConvolutionLayer::run()
-{
- prepare();
- _function->run();
-}
-
-void CLConvolutionLayer::prepare()
-{
- _function->prepare();
-}
-} // namespace arm_compute
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index 918848745e..8038a75dc1 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
#include <cmath>
#include <memory>
diff --git a/src/runtime/gpu/cl/operators/ClConcatenate.h b/src/runtime/gpu/cl/operators/ClConcatenate.h
index fb1235b9be..153400bd73 100644
--- a/src/runtime/gpu/cl/operators/ClConcatenate.h
+++ b/src/runtime/gpu/cl/operators/ClConcatenate.h
@@ -75,5 +75,5 @@ private:
unsigned int _axis{ 0 };
};
} // namespace opencl
-} // namespace arm_comPUTE
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CL_CONCATENATE_H */
diff --git a/src/runtime/gpu/cl/operators/ClConv2d.cpp b/src/runtime/gpu/cl/operators/ClConv2d.cpp
new file mode 100644
index 0000000000..4cd65290f3
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClConv2d.cpp
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClConv2d.h"
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
+#include "src/runtime/gpu/cl/operators/ClDirectConv2d.h"
+#include "src/runtime/gpu/cl/operators/ClGemmConvolution.h"
+#include "src/runtime/gpu/cl/operators/ClWinogradConv2d.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace opencl
+{
+using namespace arm_compute::misc::shape_calculator;
+
+ClConv2d::ClConv2d()
+ : _operator()
+{
+}
+
+ClConv2d::~ClConv2d() = default;
+
+void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info,
+ const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info));
+
+ switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target()))
+ {
+ case ConvolutionMethod::WINOGRAD:
+ {
+ ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);
+ auto f = std::make_unique<ClWinogradConv2d>();
+ f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math);
+ _operator = std::move(f);
+ break;
+ }
+ case ConvolutionMethod::DIRECT:
+ {
+ ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1);
+ auto f = std::make_unique<ClDirectConv2d>();
+ f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info);
+ _operator = std::move(f);
+ break;
+ }
+ case ConvolutionMethod::GEMM:
+ {
+ auto f = std::make_unique<ClGemmConvolution>();
+ f->configure(compile_context, src, weights, biases, dst, conv2d_info, weights_info);
+ _operator = std::move(f);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
+ }
+ _aux_mem = _operator->workspace();
+}
+
+Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
+ const WeightsInfo &weights_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, weights, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((conv2d_info.num_groups != 1) && (src->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported");
+
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, gpu_target))
+ {
+ case ConvolutionMethod::WINOGRAD:
+ {
+ //Validate Winograd
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported");
+ ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math));
+ break;
+ }
+ case ConvolutionMethod::DIRECT:
+ {
+ // Validate direct convolution layer
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported");
+ ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info));
+ break;
+ }
+ case ConvolutionMethod::GEMM:
+ {
+ // Validate gemm-based convolution layer
+ ARM_COMPUTE_RETURN_ON_ERROR(ClGemmConvolution::validate(src, weights, biases, dst, conv2d_info, weights_info));
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Not supported.");
+ break;
+ }
+
+ return Status{};
+}
+
+ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
+ const WeightsInfo &weights_info, const GPUTarget gpu_target)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(dst);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(weights);
+ ARM_COMPUTE_UNUSED(weights_info);
+ ARM_COMPUTE_UNUSED(gpu_target);
+
+ const PadStrideInfo conv_info = conv2d_info.conv_info;
+ const ActivationLayerInfo act_info = conv2d_info.act_info;
+ const Size2D dilation = conv2d_info.dilation;
+ bool enable_fast_math = conv2d_info.enable_fast_math;
+
+ const size_t idx_w = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);
+ const size_t idx_c = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL);
+
+ /* Input spatial dims, kernel size, IFM/OFM, conv info*/
+ using ConvolutionConfiguration = std::tuple<Size2D, Size2D, Size2D, PadStrideInfo, DataLayout>;
+ using ConfigurationMethod = std::pair<ConvolutionConfiguration, ConvolutionMethod>;
+
+ const std::vector<ConfigurationMethod> known_configs =
+ {
+ // Alexnet
+ ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
+ // VGG16 / VGG19
+ ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U), DataLayout::NCHW), ConvolutionMethod::DIRECT),
+ // Mobilenet 224
+ ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
+ // Mobilenet 160
+ ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NCHW), ConvolutionMethod::GEMM),
+ // Mobilenet 224
+ ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
+ // Mobilenet 160
+ ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), DataLayout::NHWC), ConvolutionMethod::GEMM),
+ };
+
+ const auto find_config = [&](ConfigurationMethod c)
+ {
+ const ConvolutionConfiguration config = c.first;
+ const PadStrideInfo info = std::get<3>(config);
+ const DataLayout data_layout = std::get<4>(config);
+
+ return std::get<0>(config) == Size2D(src->dimension(idx_w), src->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h))
+ && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right()
+ && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride() && (data_layout == src->data_layout());
+ };
+
+ std::vector<ConfigurationMethod>::const_iterator found;
+ if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end())
+ {
+ return (*found).second;
+ }
+
+ if(dilation != Size2D(1U, 1U))
+ {
+ return ConvolutionMethod::GEMM;
+ }
+ else
+ {
+ if(src->data_layout() == DataLayout::NCHW)
+ {
+ // SRGAN
+ if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
+ && (ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info)))
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+ if((weights->dimension(idx_h) > 5) && (src->dimension(idx_c) > dst->dimension(idx_c)) && (CLFFTConvolutionLayer::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)))
+ {
+ return ConvolutionMethod::FFT;
+ }
+ if(src->dimension(idx_c) < 16)
+ {
+ return ConvolutionMethod::GEMM;
+ }
+ return bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM;
+ }
+ else
+ {
+ const bool is_direct_valid = bool(ClDirectConv2d::validate(src, weights, nullptr, dst, conv_info, act_info));
+ const bool is_wino_valid = bool(ClWinogradConv2d::validate(src, weights, nullptr, dst, conv_info, act_info, enable_fast_math));
+
+ // SRGAN case
+ if((src->dimension(idx_h) > 720U) && (dst->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) && (conv_info.pad_top() < 3)
+ && is_direct_valid)
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+
+ // Floating-point case: GeMM/Direct/Winograd
+ if(is_data_type_float(src->data_type()))
+ {
+ const bool is_large_kernel_sz = (weights->dimension(idx_w) >= 7) && (weights->dimension(idx_h) >= 7);
+ const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16;
+
+ // Run Winograd if valid and IFM >= 16
+ if(is_wino_valid && is_ifm_ge_16)
+ {
+ return ConvolutionMethod::WINOGRAD;
+ }
+ // Run Direct for Large kernel size
+ if(is_large_kernel_sz && is_ifm_ge_16 && is_direct_valid)
+ {
+ return ConvolutionMethod::DIRECT;
+ }
+
+ // Default case
+ return ConvolutionMethod::GEMM;
+ }
+
+ // Generic case for quantized. Only GeMM
+ return ConvolutionMethod::GEMM;
+ }
+ }
+}
+
+void ClConv2d::run(ITensorPack &tensors)
+{
+ prepare(tensors);
+ _operator->run(tensors);
+}
+
+void ClConv2d::prepare(ITensorPack &tensors)
+{
+ _operator->prepare(tensors);
+}
+
+experimental::MemoryRequirements ClConv2d::workspace() const
+{
+ return _aux_mem;
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClConv2d.h b/src/runtime/gpu/cl/operators/ClConv2d.h
new file mode 100644
index 0000000000..0888c2f47b
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClConv2d.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCONV2D_H
+#define ARM_COMPUTE_CLCONV2D_H
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref opencl::ClGemmConvolution
+ * -# @ref opencl::ClWinogradConv2d
+ * -# @ref opencl::ClDirectConv2d
+ * -# @ref CLFFTConvolutionLayer
+ *
+ * The function selects one of the algorithms mentioned above based on:
+ * - The size of the kernel
+ * - Number of src/dst feature maps
+ * - Amount of memory needed
+ *
+ * Generally GEMM-based convolution is executed when neither Winograd nor FFT nor Direct convolution can be performed.
+ *
+ * FP32 Algorithm| Filter Size | Input/Output feature maps |
+ * --------------|-------------------------------------------------------------|-------------------------------------------|
+ * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5(fast maths) 7x1 1x7 | Input channels is greater than 3 |
+ * FFT | Squared kernels and greater than 9x9 | Input feature maps > Output feature maps |
+ * DirectConv | 9x9 | |
+ * GEMM | Any size | |
+ *
+ * Winograd 5x5 requires fast maths enabled.
+ *
+ * FP16 Algorithm| Filter Size | Input/Output feature maps |
+ * --------------|----------------------------|-------------------------------------------|
+ * Winograd | 3x3 1x3 3x1 5x1 1x5 5x5 | Input channels is greater than 3 |
+ * FFT | Not supported | |
+ * DirectConv | 9x9 | |
+ * GEMM | Any size | |
+ *
+ * Winograd FP16 requires fast maths enabled.
+ *
+ */
+class ClConv2d : public IClOperator
+{
+public:
+ /** Default constructor */
+ ClConv2d();
+ /** Default Destructor */
+ ~ClConv2d();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ClConv2d(const ClConv2d &) = delete;
+ /** Default move constructor */
+ ClConv2d(ClConv2d &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ClConv2d &operator=(const ClConv2d &) = delete;
+ /** Default move assignment operator */
+ ClConv2d &operator=(ClConv2d &&) = default;
+ /** Set the src and dst tensors.
+ *
+ * Valid data layouts:
+ * - NHWC
+ * - NCHW
+ *
+ * Valid data type configurations:
+ * |src0 |src1 |src2 |dst |
+ * |:--------------|:------------------|:------|:--------------|
+ * |F16 |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |F32 |
+ * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
+ * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
+ * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
+ * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor info. 3 lower dimensions represent a single src [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of srcs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+ * Data type supported: Same as @p src, except for src of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[out] dst Destination tensor info. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
+ * Data types supported: Same as @p src.
+ * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p src.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const Conv2dInfo &conv2d_info,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref ClConv2d
+ *
+ * Similar to ClConv2d::configure()
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
+ const WeightsInfo &weights_info = WeightsInfo());
+ /** Static function to check if given info will return the convolution called by @ref ClConv2d
+ *
+ * @param[in] src Source tensor. 3 lower dimensions represent a single src [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of srcs.
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: Same as @p src, also could be QSYMM8_PER_CHANNEL if src is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] dst Destination tensor. 3 lower dimensions represent a single dst [width, height, OFM], while the rest represent batch of dsts.
+ * Data types supported: Same as @p src.
+ * @param[in] conv2d_info Contains convolution 2d info described in @ref Conv2dInfo.
+ * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel.
+ * @param[in] gpu_target Specifies the @p GPUTarget.
+ *
+ * @return the Convolution Method Hint
+ */
+ static ConvolutionMethod get_convolution_method(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const Conv2dInfo &conv2d_info,
+ const WeightsInfo &weights_info, const GPUTarget gpu_target);
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+ void prepare(ITensorPack &tensors) override;
+ experimental::MemoryRequirements workspace() const override;
+
+private:
+ std::unique_ptr<IClOperator> _operator;
+ experimental::MemoryRequirements _aux_mem{};
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCONV2D_H */
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index 31eed7646c..6824ce1413 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -216,18 +216,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<float>, framework
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
- framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
- framework::dataset::make("Bias", TensorShape(2U))),
- framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1, 1))),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType",DataType::F32)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- ActivationFunctionsSmallDataset))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
+ framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
+ framework::dataset::make("Bias", TensorShape(2U))),
+ framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
+ framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
+ framework::dataset::make("Dilation", Size2D(1, 1))),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ ActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -288,18 +288,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
- framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
- framework::dataset::make("Bias", TensorShape(2U))),
- framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1, 1))),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
+ framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
+ framework::dataset::make("Bias", TensorShape(2U))),
+ framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
+ framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
+ framework::dataset::make("Dilation", Size2D(1, 1))),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ QuantizationData),
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -318,18 +318,18 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<int8_t>,
validate(CLAccessor(_target), _reference, tolerance_qasymm8);
}
FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL,
- combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
- framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
- framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
- framework::dataset::make("Bias", TensorShape(2U))),
- framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
- framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
- framework::dataset::make("Dilation", Size2D(1, 1))),
- framework::dataset::make("ReshapeWeights", { true })),
- framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
- QuantizationData),
- QuantizedActivationFunctionsSmallDataset))
+ combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+ framework::dataset::make("Input", TensorShape(23U, 27U, 5U)),
+ framework::dataset::make("Weights", TensorShape(3U, 3U, 5U, 2U))),
+ framework::dataset::make("Bias", TensorShape(2U))),
+ framework::dataset::make("Output", TensorShape(11U, 25U, 2U))),
+ framework::dataset::make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
+ framework::dataset::make("Dilation", Size2D(1, 1))),
+ framework::dataset::make("ReshapeWeights", { true })),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ QuantizationData),
+ QuantizedActivationFunctionsSmallDataset))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_qasymm8);