From 2213d4b334567d0cb7f283090d42b5fb1b70f66b Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 27 Apr 2018 10:39:06 +0100 Subject: COMPMID-1096 - Add fast_math flag to CLConvolutionLayer COMPMID-1103 - CLWinogradConvolutionLayer mismatches Change-Id: Iceaa9482a1790ec39d2720c220261aaea8043978 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129398 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Georgios Pinitas --- src/runtime/CL/functions/CLConvolutionLayer.cpp | 37 ++++++++++++------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'src/runtime/CL/functions/CLConvolutionLayer.cpp') diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 97ef895434..83281e1747 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -43,32 +43,33 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma } void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info)); + ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, + enable_fast_math)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, - weights_info, act_info, CLScheduler::get().target(), dilation)) + weights_info, act_info, CLScheduler::get().target(), dilation, enable_fast_math)) { case ConvolutionMethod::WINOGRAD: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info); + f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math); _function = std::move(f); break; } case ConvolutionMethod::DIRECT: { auto f = arm_compute::support::cpp14::make_unique(); - f->configure(input, weights, biases, output, conv_info); + f->configure(input, weights, biases, output, conv_info, act_info); _function = std::move(f); break; } case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info, dilation); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info); _function = std::move(f); break; } @@ -79,19 +80,18 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - //Configure if the parameters match the direct convolution or the gemm-based const GPUTarget gpu_target = CLScheduler::get().target(); - switch(CLConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, act_info, gpu_target, dilation)) + switch(CLConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, act_info, gpu_target, dilation, enable_fast_math)) { case ConvolutionMethod::WINOGRAD: { //Validate Winograd - CLWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info); + CLWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math); break; } case ConvolutionMethod::DIRECT: @@ -115,25 +115,22 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation) + const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation, bool enable_fast_math) { ARM_COMPUTE_ERROR_ON_NULLPTR(input); ARM_COMPUTE_ERROR_ON_NULLPTR(output); ARM_COMPUTE_ERROR_ON_NULLPTR(weights); - ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(weights_info); ARM_COMPUTE_UNUSED(gpu_target); - const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); - - if((input->data_type() == DataType::F32) && (input->data_layout() == DataLayout::NCHW) && (input->dimension(idx_c) > 3) && (weights->dimension(idx_w) == 3) && (weights->dimension(idx_h) == 3) - && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1) && (conv_info.stride().second == 1) && (dilation == Size2D(1U, 1U)) && (!act_info.enabled())) + if(dilation != Size2D(1U, 1U)) + { + return ConvolutionMethod::GEMM; + } + else { - return ConvolutionMethod::WINOGRAD; + return bool(CLWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; } - return ConvolutionMethod::GEMM; } void CLConvolutionLayer::run() -- cgit v1.2.1