From e083771a1f28c34485f0d0054e2645070df98846 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Tue, 12 Jun 2018 11:30:50 +0100 Subject: COMPMID-1160 Turn Graph hints into heuristics Change-Id: Id24c2f07c59d863f8e1af6a1afbf6a542b2b9954 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135142 Tested-by: Jenkins Reviewed-by: Pablo Tello Reviewed-by: Anthony Barbier --- examples/graph_alexnet.cpp | 12 ++------ examples/graph_mobilenet.cpp | 4 +-- examples/graph_vgg16.cpp | 12 ++------ examples/graph_vgg19.cpp | 6 ---- src/runtime/CL/functions/CLConvolutionLayer.cpp | 34 +++++++++++++++++++++ src/runtime/NEON/functions/NEConvolutionLayer.cpp | 36 +++++++++++++++++++++++ 6 files changed, 77 insertions(+), 27 deletions(-) diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index 9e6d91962e..5328662b6d 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -53,13 +53,9 @@ public: std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - Target target_hint = set_target_hint(target); - - const bool is_neon = (target_hint == Target::NEON); - ConvolutionMethod convolution_5x5_hint = is_neon ? ConvolutionMethod::GEMM : ConvolutionMethod::DIRECT; - ConvolutionMethod convolution_3x3_hint = ConvolutionMethod::DEFAULT; - FastMathHint fast_math_hint = FastMathHint::DISABLED; + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) @@ -117,7 +113,6 @@ public: << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1") << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") // Layer 2 - << convolution_5x5_hint << ConvolutionLayer( 5U, 5U, 256U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy"), @@ -127,7 +122,6 @@ public: << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2") << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2") << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") - << convolution_3x3_hint // Layer 3 << ConvolutionLayer( 3U, 3U, 384U, diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index 50dc02482f..40243bb111 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -35,7 +35,7 @@ using namespace arm_compute::graph_utils; /** Example demonstrating how to implement MobileNet's network using the Compute Library's graph API * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) + * @param[in] argv Arguments ( [optional] Target (0 = NEON, 1 = OpenCL, 2 = OpenCL with Tuner), [optional] Model ID (0 = MobileNetV1_1.0_224, 1 = MobileNetV1_0.75_160), [optional] Path to the weights folder, [optional] image, [optional] labels, [optional] data layout, [optional] Fast math for convolution layer (0 = DISABLED, 1 = ENABLED) ) */ class GraphMobilenetExample : public Example { @@ -52,7 +52,6 @@ public: // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; Target target_hint = set_target_hint(target); - ConvolutionMethod convolution_hint = ConvolutionMethod::GEMM; DepthwiseConvolutionMethod depthwise_convolution_hint = DepthwiseConvolutionMethod::OPTIMIZED_3x3; FastMathHint fast_math_hint = FastMathHint::DISABLED; @@ -133,7 +132,6 @@ public: } graph << target_hint - << convolution_hint << depthwise_convolution_hint << fast_math_hint << InputLayer(input_descriptor, diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp index 72e724025b..d70c56eadd 100644 --- a/examples/graph_vgg16.cpp +++ b/examples/graph_vgg16.cpp @@ -51,13 +51,9 @@ public: std::unique_ptr preprocessor = arm_compute::support::cpp14::make_unique(mean_rgb); // Set target. 0 (NEON), 1 (OpenCL), 2 (OpenCL with Tuner). By default it is NEON - const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; - Target target_hint = set_target_hint(target); - const bool is_opencl = target_hint == Target::CL; - - ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM; - ConvolutionMethod convolution3x3_hint = ConvolutionMethod::DEFAULT; - FastMathHint fast_math_hint = FastMathHint::DISABLED; + const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; + Target target_hint = set_target_hint(target); + FastMathHint fast_math_hint = FastMathHint::DISABLED; // Parse arguments if(argc < 2) @@ -102,7 +98,6 @@ public: graph << target_hint << fast_math_hint - << first_convolution3x3_hint << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), get_input_accessor(image, std::move(preprocessor))) // Layer 1 @@ -113,7 +108,6 @@ public: PadStrideInfo(1, 1, 1, 1)) .set_name("conv1_1") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu") - << convolution3x3_hint // Layer 2 << ConvolutionLayer( 3U, 3U, 64U, diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp index b15c3f2def..8a0ec6fdbd 100644 --- a/examples/graph_vgg19.cpp +++ b/examples/graph_vgg19.cpp @@ -54,10 +54,6 @@ public: const int target = argc > 1 ? std::strtol(argv[1], nullptr, 10) : 0; Target target_hint = set_target_hint(target); FastMathHint fast_math_hint = FastMathHint::DISABLED; - const bool is_opencl = target_hint == Target::CL; - - ConvolutionMethod first_convolution3x3_hint = is_opencl ? ConvolutionMethod::DIRECT : ConvolutionMethod::GEMM; - ConvolutionMethod convolution3x3_hint = ConvolutionMethod::DEFAULT; // Parse arguments if(argc < 2) @@ -101,7 +97,6 @@ public: } graph << target_hint - << first_convolution3x3_hint << fast_math_hint << InputLayer(TensorDescriptor(TensorShape(224U, 224U, 3U, 1U), DataType::F32), get_input_accessor(image, std::move(preprocessor))) @@ -113,7 +108,6 @@ public: PadStrideInfo(1, 1, 1, 1)) .set_name("conv1_1") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu") - << convolution3x3_hint << ConvolutionLayer( 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy"), diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 47a8d5fa8f..a9a05b69ed 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -123,8 +123,42 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo * ARM_COMPUTE_UNUSED(weights_info); ARM_COMPUTE_UNUSED(gpu_target); + const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); + /* Input spatial dims, kernel size, IFM/OFM, conv info*/ + using ConvolutionConfiguration = std::tuple; + using ConfigurationMethod = std::pair; + + const std::vector known_configs = + { + // Alexnet + ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::DIRECT), + // VGG16 / VGG19 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::DIRECT), + // Mobilenet 224 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM), + // Mobilenet 160 + ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM) + }; + + const auto find_config = [&](ConfigurationMethod c) + { + const ConvolutionConfiguration config = c.first; + const PadStrideInfo info = std::get<3>(config); + + return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) + && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() + && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride(); + }; + + std::vector::const_iterator found; + if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) + { + return (*found).second; + } + if(dilation != Size2D(1U, 1U) || (input->dimension(idx_c) < 16)) { return ConvolutionMethod::GEMM; diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 7053c7e345..96ac95f00c 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -108,6 +108,42 @@ ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo * ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, weights); ARM_COMPUTE_UNUSED(weights_info); + const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL); + + /* Input spatial dims, kernel size, IFM/OFM, conv info*/ + using ConvolutionConfiguration = std::tuple; + using ConfigurationMethod = std::pair; + + const std::vector known_configs = + { + // Alexnet + ConfigurationMethod(ConvolutionConfiguration(Size2D(27U, 27U), Size2D(5U, 5U), Size2D(48U, 128U), PadStrideInfo(1U, 1U, 2U, 2U)), ConvolutionMethod::GEMM), + // VGG16 / VGG19 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), ConvolutionMethod::GEMM), + // Mobilenet 224 + ConfigurationMethod(ConvolutionConfiguration(Size2D(224U, 224U), Size2D(3U, 3U), Size2D(3U, 32U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM), + // Mobilenet 160 + ConfigurationMethod(ConvolutionConfiguration(Size2D(160U, 160U), Size2D(3U, 3U), Size2D(3U, 24U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR)), ConvolutionMethod::GEMM) + }; + + const auto find_config = [&](ConfigurationMethod c) + { + const ConvolutionConfiguration config = c.first; + const PadStrideInfo info = std::get<3>(config); + + return std::get<0>(config) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(config) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) + && std::get<2>(config) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() + && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride(); + }; + + std::vector::const_iterator found; + if((found = std::find_if(known_configs.begin(), known_configs.end(), find_config)) != known_configs.end()) + { + return (*found).second; + } + if(dilation != Size2D(1U, 1U) || Scheduler::get().cpu_info().get_cpu_model() == CPUModel::A53 || input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)) <= 16) { -- cgit v1.2.1