diff options
Diffstat (limited to 'src/gpu/cl/operators')
-rw-r--r-- | src/gpu/cl/operators/ClConv2d.cpp | 32 | ||||
-rw-r--r-- | src/gpu/cl/operators/ClGemm.cpp | 16 | ||||
-rw-r--r-- | src/gpu/cl/operators/ClGemmConv2d.cpp | 50 | ||||
-rw-r--r-- | src/gpu/cl/operators/ClGemmConv2d.h | 18 |
4 files changed, 45 insertions, 71 deletions
diff --git a/src/gpu/cl/operators/ClConv2d.cpp b/src/gpu/cl/operators/ClConv2d.cpp index 51248d4a7a..eb9475ccaa 100644 --- a/src/gpu/cl/operators/ClConv2d.cpp +++ b/src/gpu/cl/operators/ClConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -90,7 +90,6 @@ void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *s case ConvolutionMethod::WINOGRAD: { ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); - ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0); auto f = std::make_unique<ClWinogradConv2d>(); f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math); _operator = std::move(f); @@ -99,7 +98,6 @@ void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *s case ConvolutionMethod::DIRECT: { ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); - ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0); auto f = std::make_unique<ClDirectConv2d>(); f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info); _operator = std::move(f); @@ -108,7 +106,6 @@ void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *s case ConvolutionMethod::INDIRECT: { ARM_COMPUTE_ERROR_ON(conv2d_info.num_groups != 1); - ARM_COMPUTE_ERROR_ON(conv2d_info.post_ops.size() > 0); auto f = std::make_unique<ClIndirectConv2d>(); f->configure(compile_context, src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info); _operator = std::move(f); @@ -142,7 +139,6 @@ Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, co { //Validate Winograd ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClWinogradConv2d is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClWinogradConv2d does not support PostOps"); ARM_COMPUTE_RETURN_ON_ERROR(ClWinogradConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info, conv2d_info.enable_fast_math)); break; } @@ -150,7 +146,6 @@ Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, co { // Validate direct convolution layer ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClDirectConv2d is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClDirectConv2d does not support PostOps"); ARM_COMPUTE_RETURN_ON_ERROR(ClDirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info)); break; } @@ -158,7 +153,6 @@ Status ClConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights, co { // Validate indirect convolution layer ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.num_groups != 1, "Grouping (num_groups != 1) with ClIndirectConv2d is not supported"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClIndirectConv2d does not support PostOps"); ARM_COMPUTE_RETURN_ON_ERROR(ClIndirectConv2d::validate(src, weights, biases, dst, conv2d_info.conv_info, conv2d_info.act_info)); break; } @@ -271,17 +265,17 @@ ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const if(is_data_type_float(src->data_type())) { // Get dst shape - TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info); - const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr); - const bool is_ifm_ge_8 = src->dimension(idx_c) >= 8; - const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16; - const bool is_ofm_lte_8 = weights->dimension(3U) <= 8; - const bool is_ofm_lt_64 = weights->dimension(3U) < 64; - const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192; - const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U); - const bool is_m_one = output_shape[1] * output_shape[2] == 1; - const bool is_unit_stride = (conv2d_info.conv_info.stride().first == 1) && (conv2d_info.conv_info.stride().second == 1); - const int32_t kernel_sz = weights->dimension(idx_w) * weights->dimension(idx_h); + TensorShape output_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info); + const bool is_large_kernel_sz = (weights->dimension(idx_w) >= kernel_sz_direct_conv_thr) && (weights->dimension(idx_h) >= kernel_sz_direct_conv_thr); + const bool is_ifm_ge_8 = src->dimension(idx_c) >= 8; + const bool is_ifm_ge_16 = src->dimension(idx_c) >= 16; + const bool is_ofm_lte_8 = weights->dimension(3U) <= 8; + const bool is_ofm_lt_64 = weights->dimension(3U) < 64; + const bool workload_gte_8192 = (output_shape[0] * output_shape[1] * output_shape[2]) / 16 >= 8192; + const bool is_ifm_gt_ofm = src->dimension(idx_c) > weights->dimension(3U); + const bool is_m_one = output_shape[1] * output_shape[2] == 1; + const bool is_unit_stride = (conv2d_info.conv_info.stride().first == 1) && (conv2d_info.conv_info.stride().second == 1); + const int32_t kernel_sz = weights->dimension(idx_w) * weights->dimension(idx_h); // Run Winograd if valid and IFM >= 8 if(is_wino_valid && is_ifm_ge_8) @@ -330,7 +324,7 @@ ConvolutionMethod ClConv2d::get_convolution_method(const ITensorInfo *src, const { const bool is_kernel_sz_odd = kernel_sz % 2; const bool is_g77 = gpu_target == GPUTarget::G77; - preferred_conv_method = (kernel_sz > 1) && (kernel_sz <= 81) && is_kernel_sz_odd && is_g77? ConvolutionMethod::INDIRECT : ConvolutionMethod::DIRECT; + preferred_conv_method = (kernel_sz > 1) && (kernel_sz <= 81) && is_kernel_sz_odd && is_g77 ? ConvolutionMethod::INDIRECT : ConvolutionMethod::DIRECT; } // Direct/indirect convolution used for the first layer of the network diff --git a/src/gpu/cl/operators/ClGemm.cpp b/src/gpu/cl/operators/ClGemm.cpp index 8db6dabe58..7e331a86f3 100644 --- a/src/gpu/cl/operators/ClGemm.cpp +++ b/src/gpu/cl/operators/ClGemm.cpp @@ -38,7 +38,6 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/ITensorAllocator.h" -#include "arm_compute/core/experimental/IPostOp.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/MemoryHelpers.h" #include "src/core/utils/helpers/float_ops.h" @@ -222,7 +221,6 @@ void ClGemm::configure_native(const CLCompileContext &compile_context, ITensorIn kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); // Set the target for the kernels _mm_native_kernel->set_target(gpu_target); @@ -254,7 +252,6 @@ void ClGemm::configure_reshaped(const CLCompileContext &compile_context, ITensor kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); // Set the target for the kernels _reshape_lhs_kernel->set_target(gpu_target); @@ -299,7 +296,6 @@ void ClGemm::configure_reshaped_only_rhs(const CLCompileContext &compile_context kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); // Set the target for the kernels _mm_reshaped_only_rhs_kernel->set_target(gpu_target); @@ -346,7 +342,6 @@ void ClGemm::configure_reshaped_only_rhs_mmul(const CLCompileContext &compile_co kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); // Set the target for the kernels _mm_reshaped_only_rhs_mmul_kernel->set_target(gpu_target); @@ -396,7 +391,6 @@ Status ClGemm::validate_native(const ITensorInfo *a, const ITensorInfo *b, const kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); auto config = auto_heuristics::select_mlgo_gemm_config_reshaped_only_rhs(auto_heuristics::CommonQuery{ gpu_target, data_type, m, n, k, batch_size }); @@ -433,7 +427,6 @@ Status ClGemm::validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, con kernel_info.reinterpret_input_as_3d = false; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); GEMMLHSMatrixInfo lhs_info; GEMMRHSMatrixInfo rhs_info; @@ -482,7 +475,6 @@ Status ClGemm::validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInf kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); GEMMLHSMatrixInfo lhs_info; GEMMRHSMatrixInfo rhs_info; @@ -531,7 +523,6 @@ Status ClGemm::validate_reshaped_only_rhs_mmul(const ITensorInfo *a, const ITens kernel_info.reinterpret_input_as_3d = reinterpret_input_as_3d; kernel_info.broadcast_bias = broadcast_bias; kernel_info.activation_info = gemm_info.activation_info(); - kernel_info.post_ops = gemm_info.post_ops(); GEMMLHSMatrixInfo lhs_info; GEMMRHSMatrixInfo rhs_info; @@ -624,7 +615,12 @@ Status ClGemm::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso // Select GEMMType CLGEMMKernelType gemm_kernel_type = auto_select_gemm_kernel(auto_heuristics::CommonQuery { - CLScheduler::get().target(), a->data_type(), m, n, k, batch_size, + CLScheduler::get().target(), + a->data_type(), + m, + n, + k, + batch_size, }, gemm_info.reshape_b_only_on_first_run(), b->are_values_constant()); diff --git a/src/gpu/cl/operators/ClGemmConv2d.cpp b/src/gpu/cl/operators/ClGemmConv2d.cpp index 682477e4ea..5620471ff9 100644 --- a/src/gpu/cl/operators/ClGemmConv2d.cpp +++ b/src/gpu/cl/operators/ClGemmConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -54,14 +54,14 @@ namespace opencl { ClGemmConv2d::ClGemmConv2d() : _weights_reshape_kernel(nullptr), _im2col_kernel(nullptr), _mm_gemm(nullptr), _mm_gemmlowp(nullptr), _col2im_kernel(nullptr), _activation_kernel(nullptr), _im2col_output(), _weights_reshaped(), - _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _use_post_ops(false), _aux_mem(AuxTensorIdx::Count) + _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _append_bias(false), _is_prepared(false), _aux_mem(AuxTensorIdx::Count) { } ClGemmConv2d::~ClGemmConv2d() = default; void ClGemmConv2d::configure_mm(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops) + int gemm_3d_depth, const ActivationLayerInfo &act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights); ARM_COMPUTE_ERROR_THROW_ON(validate_mm(src, weights, biases, dst, gemmlowp_output_stage, gemm_3d_depth, _skip_im2col, act_info)); @@ -76,14 +76,12 @@ void ClGemmConv2d::configure_mm(const ClCompileContext &compile_context, const I false, // fast_math false, // fp_mixed_precision true, // broadcast_bias - act_info, // activation_info - post_ops // post ops + act_info // activation_info ); TensorInfo tmp_src{ *src }; if(_is_quantized) { - ARM_COMPUTE_ERROR_ON_MSG(post_ops.size() > 0, "ClGemmConv2d quantized types do not support post ops"); // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset const QuantizationInfo input_quantization_info = src->quantization_info(); @@ -118,7 +116,7 @@ void ClGemmConv2d::configure_mm(const ClCompileContext &compile_context, const I } Status ClGemmConv2d::validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, - const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops) + const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info) { const bool is_quantized = is_data_type_quantized_asymmetric(src->data_type()); @@ -132,13 +130,11 @@ Status ClGemmConv2d::validate_mm(const ITensorInfo *src, const ITensorInfo *weig false, // fast_math false, // fp_mixed_precision true, // broadcast_bias - act_info, // activation_info - post_ops // post ops + act_info // activation_info ); if(is_quantized) { - ARM_COMPUTE_RETURN_ERROR_ON_MSG(post_ops.size() > 0, "ClGemmConv2d quantized types do not support post ops"); // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset const QuantizationInfo input_quantization_info = src->quantization_info(); @@ -189,19 +185,18 @@ void ClGemmConv2d::configure(const CLCompileContext &compile_context, ITensorInf // Only for quantize there are few cases where we cannot fuse the activation function in GEMM _fuse_activation = true; - _use_post_ops = conv2d_info.post_ops.size() > 0; const ITensorInfo *gemm_input_to_use = src; ITensorInfo *gemm_output_to_use = dst; // Get parameters from conv_info - unsigned int stride_x = 0; - unsigned int stride_y = 0; + unsigned int stride_x = 0; + unsigned int stride_y = 0; std::tie(stride_x, stride_y) = conv2d_info.conv_info.stride(); // Get convolved dimensions - unsigned int conv_w = 0; - unsigned int conv_h = 0; + unsigned int conv_w = 0; + unsigned int conv_h = 0; std::tie(conv_w, conv_h) = scaled_dimensions(src->dimension(idx_width), src->dimension(idx_height), kernel_width, @@ -318,11 +313,10 @@ void ClGemmConv2d::configure(const CLCompileContext &compile_context, ITensorInf // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; - configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.act_info, conv2d_info.post_ops); + configure_mm(compile_context, gemm_input_to_use, &_weights_reshaped, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, conv2d_info.act_info); if(!_skip_col2im) { - ARM_COMPUTE_ERROR_ON_MSG(conv2d_info.post_ops.size() > 0, "ClGemmConv2d does not support post ops with col2im operation"); // Post ops must be performed after every other op // Set the GPU target for col2im _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>(); _col2im_kernel->set_target(CLScheduler::get().target()); @@ -334,8 +328,7 @@ void ClGemmConv2d::configure(const CLCompileContext &compile_context, ITensorInf ARM_COMPUTE_ERROR_ON_MSG((dst->dimension(idx_width) != conv_w) || (dst->dimension(idx_height) != conv_h), "Output shape does not match the expected one"); - // Disable running of activation kernel if post ops are used - if(!_fuse_activation && !_use_post_ops) + if(!_fuse_activation) { _activation_kernel = std::make_unique<opencl::kernels::ClActivationKernel>(); _activation_kernel->configure(compile_context, dst, nullptr, conv2d_info.act_info); @@ -383,15 +376,11 @@ Status ClGemmConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights const bool is_quantized = is_data_type_quantized_asymmetric(data_type); const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv2d_info.conv_info.stride().first == 1 && conv2d_info.conv_info.stride().second == 1); - const bool skip_col2im = data_layout == DataLayout::NHWC; - bool fuse_activation = true; - bool use_post_ops = conv2d_info.post_ops.size() > 0; + const bool skip_col2im = data_layout == DataLayout::NHWC; + bool fuse_activation = true; ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * conv2d_info.num_groups) != src->dimension(idx_channel)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!skip_im2col - && conv2d_info.post_ops.size() > 0, - "ClGemmConv2d does not support post ops with col2im or im2col operation"); // Post ops must be performed after every other op // Validate biases if(biases != nullptr) @@ -520,8 +509,7 @@ Status ClGemmConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights // In case of NHWC, we need to run GEMM3D (gemm_3d_depth != 0) in order to avoid reshaping the output matrix const unsigned int gemm_3d_depth = (data_layout == DataLayout::NHWC) ? conv_h : 0; - ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.act_info, - conv2d_info.post_ops)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemm_input_to_use, weights_to_use, biases_to_use, gemm_output_to_use, gemmlowp_output_stage, gemm_3d_depth, skip_im2col, conv2d_info.act_info)); // Validate Col2Im if(!skip_col2im) @@ -530,8 +518,7 @@ Status ClGemmConv2d::validate(const ITensorInfo *src, const ITensorInfo *weights } // Validate Activation Layer - // Disable running (thus validation) of activation kernel if post ops are used - if(!fuse_activation && !use_post_ops) + if(!fuse_activation) { ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClActivationKernel::validate(dst, nullptr, conv2d_info.act_info)); } @@ -600,8 +587,7 @@ void ClGemmConv2d::run(ITensorPack &tensors) } //Run Activation Layer if we cannot fuse in GEMM - // Disable running of activation kernel if post ops are used - if(!_fuse_activation && !_use_post_ops) + if(!_fuse_activation) { ITensorPack pack = { @@ -620,7 +606,7 @@ void ClGemmConv2d::prepare(ITensorPack &tensors) ICLTensor *weights_reshaped_p = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(offset_int_vec(WeightsReshaped))); CLAuxTensorHandler weights_reshaped(_weights_reshaped, *weights_reshaped_p); auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1); - ITensorPack pack = + ITensorPack pack = { { TensorType::ACL_SRC, weights }, { TensorType::ACL_DST, weights_reshaped.get() } diff --git a/src/gpu/cl/operators/ClGemmConv2d.h b/src/gpu/cl/operators/ClGemmConv2d.h index afde7c511d..8a46ee2dc3 100644 --- a/src/gpu/cl/operators/ClGemmConv2d.h +++ b/src/gpu/cl/operators/ClGemmConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_GEMM_CONV2D_H -#define ARM_COMPUTE_CL_GEMM_CONV2D_H +#ifndef ACL_SRC_GPU_CL_OPERATORS_CLGEMMCONV2D_H +#define ACL_SRC_GPU_CL_OPERATORS_CLGEMMCONV2D_H #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" -#include "arm_compute/core/experimental/IPostOp.h" #include "arm_compute/runtime/FunctionDescriptors.h" #include "src/gpu/cl/ClCompileContext.h" #include "src/gpu/cl/IClOperator.h" @@ -113,8 +112,8 @@ public: const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: - void run(ITensorPack &tensors) override; - void prepare(ITensorPack &constants) override; + void run(ITensorPack &tensors) override; + void prepare(ITensorPack &constants) override; experimental::MemoryRequirements workspace() const override; private: @@ -133,7 +132,7 @@ private: */ void configure_mm(const CLCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *weights, ITensorInfo *biases, ITensorInfo *dst, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {}); + int gemm_3d_depth, const ActivationLayerInfo &act_info); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines * * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. @@ -150,7 +149,7 @@ private: * @return a status */ static Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, - int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info, const experimental::PostOpList<ITensorInfo *> &post_ops = experimental::PostOpList<ITensorInfo *> {}); + int gemm_3d_depth, bool skip_im2col, const ActivationLayerInfo &act_info); enum AuxTensorIdx { @@ -178,10 +177,9 @@ private: bool _fuse_activation; bool _append_bias; bool _is_prepared; - bool _use_post_ops; experimental::MemoryRequirements _aux_mem; }; } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_GEMM_CONV2D_H */ +#endif // ACL_SRC_GPU_CL_OPERATORS_CLGEMMCONV2D_H |