diff options
-rw-r--r-- | tests/benchmark_new/CL/DirectConvolutionLayer.cpp | 63 | ||||
-rw-r--r-- | tests/benchmark_new/CL/SYSTEM/AlexNet.cpp | 2 | ||||
-rw-r--r-- | tests/benchmark_new/NEON/DirectConvolutionLayer.cpp | 33 | ||||
-rw-r--r-- | tests/benchmark_new/NEON/SYSTEM/AlexNet.cpp | 2 | ||||
-rw-r--r-- | tests/datasets_new/AlexNetConvolutionLayerDataset.h | 11 | ||||
-rw-r--r-- | tests/datasets_new/GoogLeNetConvolutionLayerDataset.h | 89 | ||||
-rw-r--r-- | tests/fixtures_new/AlexNetFixture.h | 6 | ||||
-rw-r--r-- | tests/networks_new/AlexNetNetwork.h | 242 |
8 files changed, 351 insertions, 97 deletions
diff --git a/tests/benchmark_new/CL/DirectConvolutionLayer.cpp b/tests/benchmark_new/CL/DirectConvolutionLayer.cpp new file mode 100644 index 0000000000..5a4536ceef --- /dev/null +++ b/tests/benchmark_new/CL/DirectConvolutionLayer.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" +#include "framework/Macros.h" +#include "framework/datasets/Datasets.h" +#include "tests/CL/CLAccessor.h" +#include "tests/TypePrinter.h" +#include "tests/datasets_new/AlexNetConvolutionLayerDataset.h" +#include "tests/datasets_new/GoogLeNetConvolutionLayerDataset.h" +#include "tests/datasets_new/SqueezeNetConvolutionLayerDataset.h" +#include "tests/fixtures_new/ConvolutionLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +using CLConvolutionLayerFixture = ConvolutionLayerFixture<CLTensor, CLDirectConvolutionLayer, CLAccessor>; + +TEST_SUITE(CL) + +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, CLConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("Batches", { 1, 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetDirectConvolutionLayer, CLConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetDirectConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("Batches", { 1, 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, CLConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("Batches", { 1, 4, 8 }))); + +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark_new/CL/SYSTEM/AlexNet.cpp b/tests/benchmark_new/CL/SYSTEM/AlexNet.cpp index e6d91d9706..271ed9904e 100644 --- a/tests/benchmark_new/CL/SYSTEM/AlexNet.cpp +++ b/tests/benchmark_new/CL/SYSTEM/AlexNet.cpp @@ -28,6 +28,7 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" #include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h" #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" @@ -48,6 +49,7 @@ using CLAlexNetFixture = AlexNetFixture<ICLTensor, CLAccessor, CLActivationLayer, CLConvolutionLayer, + CLDirectConvolutionLayer, CLFullyConnectedLayer, CLNormalizationLayer, CLPoolingLayer, diff --git a/tests/benchmark_new/NEON/DirectConvolutionLayer.cpp b/tests/benchmark_new/NEON/DirectConvolutionLayer.cpp index dcefbc7512..c4eec697af 100644 --- a/tests/benchmark_new/NEON/DirectConvolutionLayer.cpp +++ b/tests/benchmark_new/NEON/DirectConvolutionLayer.cpp @@ -30,7 +30,9 @@ #include "framework/datasets/Datasets.h" #include "tests/NEON/Accessor.h" #include "tests/TypePrinter.h" -#include "tests/datasets_new/DirectConvolutionLayerDataset.h" +#include "tests/datasets_new/AlexNetConvolutionLayerDataset.h" +#include "tests/datasets_new/GoogLeNetConvolutionLayerDataset.h" +#include "tests/datasets_new/SqueezeNetConvolutionLayerDataset.h" #include "tests/fixtures_new/ConvolutionLayerFixture.h" namespace arm_compute @@ -39,19 +41,32 @@ namespace test { namespace { -#ifdef ARM_COMPUTE_ENABLE_FP16 -const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F16, DataType::F32 }); -#else /* ARM_COMPUTE_ENABLE_FP16 */ -const auto data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F32 }); -#endif /* ARM_COMPUTE_ENABLE_FP16 */ +#ifdef ARM_COMPUTE_ENABLE_F16 +const auto alexnet_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F16, DataType::F32 }); +const auto googlenet_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F16, DataType::F32 }); +const auto squeezenet_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F16, DataType::F32 }); +#else /* ARM_COMPUTE_ENABLE_F16 */ +const auto alexnet_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F32 }); +const auto googlenet_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F32 }); +const auto squeezenet_data_types = framework::dataset::make("DataType", { DataType::QS8, DataType::F32 }); +#endif /* ARM_COMPUTE_ENABLE_F16 */ } // namespace -using NEDirectConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEDirectConvolutionLayer, Accessor>; +using NEConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEDirectConvolutionLayer, Accessor>; TEST_SUITE(NEON) -REGISTER_FIXTURE_DATA_TEST_CASE(DirectConvolutionLayer, NEDirectConvolutionLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(framework::dataset::combine(datasets::DirectConvolutionLayerDataset(), data_types), framework::dataset::make("Batches", { 1, 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), alexnet_data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetDirectConvolutionLayerDataset(), googlenet_data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), squeezenet_data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); TEST_SUITE_END() } // namespace test diff --git a/tests/benchmark_new/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark_new/NEON/SYSTEM/AlexNet.cpp index 282d3e67bd..b186c9b966 100644 --- a/tests/benchmark_new/NEON/SYSTEM/AlexNet.cpp +++ b/tests/benchmark_new/NEON/SYSTEM/AlexNet.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" #include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" @@ -57,6 +58,7 @@ using NEAlexNetFixture = AlexNetFixture<ITensor, Accessor, NEActivationLayer, NEConvolutionLayer, + NEDirectConvolutionLayer, NEFullyConnectedLayer, NENormalizationLayer, NEPoolingLayer, diff --git a/tests/datasets_new/AlexNetConvolutionLayerDataset.h b/tests/datasets_new/AlexNetConvolutionLayerDataset.h index 0341555638..18421cffe6 100644 --- a/tests/datasets_new/AlexNetConvolutionLayerDataset.h +++ b/tests/datasets_new/AlexNetConvolutionLayerDataset.h @@ -49,6 +49,17 @@ public: add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U), PadStrideInfo(1, 1, 1, 1)); } }; + +class AlexNetDirectConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + AlexNetDirectConvolutionLayerDataset() + { + add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U), PadStrideInfo(1, 1, 1, 1)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/datasets_new/GoogLeNetConvolutionLayerDataset.h b/tests/datasets_new/GoogLeNetConvolutionLayerDataset.h index e69178a042..fd0c8a6bfe 100644 --- a/tests/datasets_new/GoogLeNetConvolutionLayerDataset.h +++ b/tests/datasets_new/GoogLeNetConvolutionLayerDataset.h @@ -40,6 +40,7 @@ namespace datasets class GoogLeNetConvolutionLayerDataset final : public ConvolutionLayerDataset { public: + // GoogLeNet inception v1 dataset GoogLeNetConvolutionLayerDataset() { // conv1/7x7_s2 @@ -142,6 +143,94 @@ public: add_config(TensorShape(7U, 7U, 48U), TensorShape(5U, 5U, 48U, 128U), TensorShape(128U), TensorShape(7U, 7U, 128U), PadStrideInfo(1, 1, 2, 2)); } }; + +class GoogLeNetDirectConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + // subset of GoogLeNet inception v1 dataset + GoogLeNetDirectConvolutionLayerDataset() + { + // conv2/3x3_reduce + add_config(TensorShape(56U, 56U, 64U), TensorShape(1U, 1U, 64U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(1, 1, 0, 0)); + // conv2/3x3 + add_config(TensorShape(56U, 56U, 64U), TensorShape(3U, 3U, 64U, 192U), TensorShape(192U), TensorShape(56U, 56U, 192U), PadStrideInfo(1, 1, 1, 1)); + // inception_3a/1x1 + add_config(TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0)); + // inception_3a/3x3_reduce + add_config(TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 96U), TensorShape(96U), TensorShape(28U, 28U, 96U), PadStrideInfo(1, 1, 0, 0)); + // inception_3a/3x3 + add_config(TensorShape(28U, 28U, 96U), TensorShape(3U, 3U, 96U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 1, 1)); + // inception_3a/5x5_reduce + add_config(TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 16U), TensorShape(16U), TensorShape(28U, 28U, 16U), PadStrideInfo(1, 1, 0, 0)); + // inception_3a/pool_proj + add_config(TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 32U), TensorShape(32U), TensorShape(28U, 28U, 32U), PadStrideInfo(1, 1, 0, 0)); + // inception_3b/1x1, inception_3b/3x3_reduce + add_config(TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 0)); + // inception_3b/3x3 + add_config(TensorShape(28U, 28U, 128U), TensorShape(3U, 3U, 128U, 192U), TensorShape(192U), TensorShape(28U, 28U, 192U), PadStrideInfo(1, 1, 1, 1)); + // inception_3b/5x5_reduce + add_config(TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 32U), TensorShape(32U), TensorShape(28U, 28U, 32U), PadStrideInfo(1, 1, 0, 0)); + // inception_3b/pool_proj + add_config(TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0)); + // inception_4a/1x1 + add_config(TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 192U), TensorShape(192U), TensorShape(14U, 14U, 192U), PadStrideInfo(1, 1, 0, 0)); + // inception_4a/3x3_reduce + add_config(TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 96U), TensorShape(96U), TensorShape(14U, 14U, 96U), PadStrideInfo(1, 1, 0, 0)); + // inception_4a/3x3 + add_config(TensorShape(14U, 14U, 96U), TensorShape(3U, 3U, 96U, 208U), TensorShape(208U), TensorShape(14U, 14U, 208U), PadStrideInfo(1, 1, 1, 1)); + // inception_4a/pool_proj + add_config(TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 64U), TensorShape(64U), TensorShape(14U, 14U, 64U), PadStrideInfo(1, 1, 0, 0)); + // inception_4b/1x1 + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 160U), TensorShape(160U), TensorShape(14U, 14U, 160U), PadStrideInfo(1, 1, 0, 0)); + // inception_4b/3x3_reduce, inception_4d/1x1 + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 112U), TensorShape(112U), TensorShape(14U, 14U, 112U), PadStrideInfo(1, 1, 0, 0)); + // inception_4b/3x3 + add_config(TensorShape(14U, 14U, 112U), TensorShape(3U, 3U, 112U, 224U), TensorShape(224U), TensorShape(14U, 14U, 224U), PadStrideInfo(1, 1, 1, 1)); + // inception_4b/5x5_reduce, inception_4c/5x5_reduce + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 24U), TensorShape(24U), TensorShape(14U, 14U, 24U), PadStrideInfo(1, 1, 0, 0)); + // inception_4b/pool_proj, inception_4c/pool_proj, inception_4d/pool_proj + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 64U), TensorShape(64U), TensorShape(14U, 14U, 64U), PadStrideInfo(1, 1, 0, 0)); + // inception_4c/1x1, inception_4c/3x3_reduce + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 128U), TensorShape(128U), TensorShape(14U, 14U, 128U), PadStrideInfo(1, 1, 0, 0)); + // inception_4c/3x3 + add_config(TensorShape(14U, 14U, 128U), TensorShape(3U, 3U, 128U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(1, 1, 1, 1)); + // inception_4d/3x3_reduce + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 144U), TensorShape(144U), TensorShape(14U, 14U, 144U), PadStrideInfo(1, 1, 0, 0)); + // inception_4d/3x3 + add_config(TensorShape(14U, 14U, 144U), TensorShape(3U, 3U, 144U, 288U), TensorShape(288U), TensorShape(14U, 14U, 288U), PadStrideInfo(1, 1, 1, 1)); + // inception_4d/5x5_reduce + add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 32U), TensorShape(32U), TensorShape(14U, 14U, 32U), PadStrideInfo(1, 1, 0, 0)); + // inception_4e/1x1 + add_config(TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(1, 1, 0, 0)); + // inception_4e/3x3_reduce + add_config(TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 160U), TensorShape(160U), TensorShape(14U, 14U, 160U), PadStrideInfo(1, 1, 0, 0)); + // inception_4e/3x3 + add_config(TensorShape(14U, 14U, 160U), TensorShape(3U, 3U, 160U, 320U), TensorShape(320U), TensorShape(14U, 14U, 320U), PadStrideInfo(1, 1, 1, 1)); + // inception_4e/5x5_reduce + add_config(TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 32U), TensorShape(32U), TensorShape(14U, 14U, 32U), PadStrideInfo(1, 1, 0, 0)); + // inception_4e/pool_proj + add_config(TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 128U), TensorShape(128U), TensorShape(14U, 14U, 128U), PadStrideInfo(1, 1, 0, 0)); + // inception_5a/1x1 + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 256U), TensorShape(256U), TensorShape(7U, 7U, 256U), PadStrideInfo(1, 1, 0, 0)); + // inception_5a/3x3_reduce + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 160U), TensorShape(160U), TensorShape(7U, 7U, 160U), PadStrideInfo(1, 1, 0, 0)); + // inception_5a/3x3 + add_config(TensorShape(7U, 7U, 160U), TensorShape(3U, 3U, 160U, 320U), TensorShape(320U), TensorShape(7U, 7U, 320U), PadStrideInfo(1, 1, 1, 1)); + // inception_5a/5x5_reduce + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 32U), TensorShape(32U), TensorShape(7U, 7U, 32U), PadStrideInfo(1, 1, 0, 0)); + // inception_5a/pool_proj, inception_5b/pool_proj + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 128U), TensorShape(128U), TensorShape(7U, 7U, 128U), PadStrideInfo(1, 1, 0, 0)); + // inception_5b/1x1 + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 384U), TensorShape(384U), TensorShape(7U, 7U, 384U), PadStrideInfo(1, 1, 0, 0)); + // inception_5b/3x3_reduce + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 192U), TensorShape(192U), TensorShape(7U, 7U, 192U), PadStrideInfo(1, 1, 0, 0)); + // inception_5b/3x3 + add_config(TensorShape(7U, 7U, 192U), TensorShape(3U, 3U, 192U, 384U), TensorShape(384U), TensorShape(7U, 7U, 384U), PadStrideInfo(1, 1, 1, 1)); + // inception_5b/5x5_reduce + add_config(TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 48U), TensorShape(48U), TensorShape(7U, 7U, 48U), PadStrideInfo(1, 1, 0, 0)); + } +}; + } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/fixtures_new/AlexNetFixture.h b/tests/fixtures_new/AlexNetFixture.h index 75384e53f0..0fbc1b77e3 100644 --- a/tests/fixtures_new/AlexNetFixture.h +++ b/tests/fixtures_new/AlexNetFixture.h @@ -39,6 +39,7 @@ template <typename ITensorType, typename Accessor, typename ActivationLayerFunction, typename ConvolutionLayerFunction, + typename DirectConvolutionLayerFunction, typename FullyConnectedLayerFunction, typename NormalizationLayerFunction, typename PoolingLayerFunction, @@ -50,10 +51,10 @@ public: template <typename...> void setup(DataType data_type, int batches) { - constexpr bool weights_transposed = true; + constexpr bool weights_reshaped = true; constexpr int fixed_point_position = 4; - network.init(data_type, fixed_point_position, batches, weights_transposed); + network.init(data_type, fixed_point_position, batches, weights_reshaped); network.build(); network.allocate(); network.fill_random(); @@ -76,6 +77,7 @@ private: Accessor, ActivationLayerFunction, ConvolutionLayerFunction, + DirectConvolutionLayerFunction, FullyConnectedLayerFunction, NormalizationLayerFunction, PoolingLayerFunction, diff --git a/tests/networks_new/AlexNetNetwork.h b/tests/networks_new/AlexNetNetwork.h index 7e1a855f07..8c801f70d3 100644 --- a/tests/networks_new/AlexNetNetwork.h +++ b/tests/networks_new/AlexNetNetwork.h @@ -43,6 +43,7 @@ template <typename ITensorType, typename Accessor, typename ActivationLayerFunction, typename ConvolutionLayerFunction, + typename DirectConvolutionLayerFunction, typename FullyConnectedLayerFunction, typename NormalizationLayerFunction, typename PoolingLayerFunction, @@ -60,11 +61,104 @@ public: // Initialize weights and biases if(!_reshaped_weights) { - init_weights(); + w[0].allocator()->init(TensorInfo(TensorShape(11U, 11U, 3U, 96U), 1, _data_type, _fixed_point_position)); + b[0].allocator()->init(TensorInfo(TensorShape(96U), 1, _data_type, _fixed_point_position)); + w[1].allocator()->init(TensorInfo(TensorShape(5U, 5U, 48U, 256U), 1, _data_type, _fixed_point_position)); + b[1].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position)); + w[2].allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, _data_type, _fixed_point_position)); + b[2].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position)); + w[3].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, _data_type, _fixed_point_position)); + b[3].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position)); + w[4].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, _data_type, _fixed_point_position)); + b[4].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position)); + w[5].allocator()->init(TensorInfo(TensorShape(9216U, 4096U), 1, _data_type, _fixed_point_position)); + b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); + w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position)); + b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); + w[7].allocator()->init(TensorInfo(TensorShape(4096U, 1000U), 1, _data_type, _fixed_point_position)); + b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position)); + + w21 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates())); + w22 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates(0, 0, 0, 128))); + b21 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates())); + b22 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates(128))); + + w41 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates())); + w42 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192))); + b41 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates())); + b42 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates(192))); + + w51 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates())); + w52 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128))); + b51 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates())); + b52 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates(128))); } else { - init_reshaped_weights(); + const unsigned int data_type_size = 16 / arm_compute::data_size_from_type(_data_type); + + // Create tensor for the reshaped weights + auto w21_tensor = std::unique_ptr<TensorType>(new TensorType()); + auto w22_tensor = std::unique_ptr<TensorType>(new TensorType()); + + w[0].allocator()->init(TensorInfo(TensorShape(366U * data_type_size, 96U / data_type_size), 1, _data_type, _fixed_point_position)); + w21_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); + w22_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); + w21 = std::move(w21_tensor); + w22 = std::move(w22_tensor); + + // Configure the direct convolution's weights. Direct convolution doesn't need reshape weights + if(!_is_direct_conv) + { + auto w41_tensor = std::unique_ptr<TensorType>(new TensorType()); + auto w42_tensor = std::unique_ptr<TensorType>(new TensorType()); + auto w51_tensor = std::unique_ptr<TensorType>(new TensorType()); + auto w52_tensor = std::unique_ptr<TensorType>(new TensorType()); + w41_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position)); + w42_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position)); + w51_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); + w52_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); + w[2].allocator()->init(TensorInfo(TensorShape(2560U * data_type_size, 384U / data_type_size), 1, _data_type, _fixed_point_position)); + w41 = std::move(w41_tensor); + w42 = std::move(w42_tensor); + w51 = std::move(w51_tensor); + w52 = std::move(w52_tensor); + } + else + { + w[2].allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, _data_type, _fixed_point_position)); + b[2].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position)); + w[3].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, _data_type, _fixed_point_position)); + b[3].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position)); + w[4].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, _data_type, _fixed_point_position)); + b[4].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position)); + w41 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates())); + w42 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192))); + b41 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates())); + b42 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates(192))); + + w51 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates())); + w52 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128))); + b51 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates())); + b52 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates(128))); + } + + b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); + b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); + b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position)); + + if(_batches > 1) + { + w[5].allocator()->init(TensorInfo(TensorShape(9216U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position)); + w[6].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position)); + w[7].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 1000U / data_type_size), 1, _data_type, _fixed_point_position)); + } + else + { + w[5].allocator()->init(TensorInfo(TensorShape(4096U, 9216U), 1, _data_type, _fixed_point_position)); + w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position)); + w[7].allocator()->init(TensorInfo(TensorShape(1000U, 4096U), 1, _data_type, _fixed_point_position)); + } } } @@ -129,7 +223,7 @@ public: norm2.configure(&act2_out, &norm2_out, NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)); pool2.configure(&norm2_out, &pool2_out, PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0))); // Layer 3 - TensorType *b2 = _reshaped_weights ? nullptr : &b[2]; + TensorType *b2 = (_reshaped_weights && !_is_direct_conv) ? nullptr : &b[2]; conv3.configure(&pool2_out, &w[2], b2, &conv3_out, PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U, 3U)); act3.configure(&conv3_out, &act3_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); // Layer 4 @@ -184,10 +278,21 @@ public: dynamic_cast<TensorType *>(w21.get())->allocator()->allocate(); dynamic_cast<TensorType *>(w22.get())->allocator()->allocate(); - dynamic_cast<TensorType *>(w41.get())->allocator()->allocate(); - dynamic_cast<TensorType *>(w42.get())->allocator()->allocate(); - dynamic_cast<TensorType *>(w51.get())->allocator()->allocate(); - dynamic_cast<TensorType *>(w52.get())->allocator()->allocate(); + if(!_is_direct_conv) + { + dynamic_cast<TensorType *>(w41.get())->allocator()->allocate(); + dynamic_cast<TensorType *>(w42.get())->allocator()->allocate(); + dynamic_cast<TensorType *>(w51.get())->allocator()->allocate(); + dynamic_cast<TensorType *>(w52.get())->allocator()->allocate(); + } + else + { + b[2].allocator()->allocate(); + b[3].allocator()->allocate(); + b[4].allocator()->allocate(); + w[3].allocator()->allocate(); + w[4].allocator()->allocate(); + } } conv1_out.allocator()->allocate(); @@ -239,10 +344,21 @@ public: library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w21.get())), 9); library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w22.get())), 10); - library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w41.get())), 11); - library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w42.get())), 12); - library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w51.get())), 13); - library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w52.get())), 14); + + if(!_is_direct_conv) + { + library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w41.get())), 11); + library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w42.get())), 12); + library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w51.get())), 13); + library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w52.get())), 14); + } + else + { + library->fill_tensor_uniform(Accessor(w[3]), 11); + library->fill_tensor_uniform(Accessor(b[3]), 12); + library->fill_tensor_uniform(Accessor(w[4]), 13); + library->fill_tensor_uniform(Accessor(b[4]), 14); + } } } @@ -340,6 +456,15 @@ public: b[5].allocator()->free(); b[6].allocator()->free(); b[7].allocator()->free(); + + if(_is_direct_conv) + { + w[3].allocator()->free(); + w[4].allocator()->free(); + b[2].allocator()->free(); + b[3].allocator()->free(); + b[4].allocator()->free(); + } } w21.reset(); @@ -416,94 +541,39 @@ public: } private: - void init_weights() + struct DirectConv { - w[0].allocator()->init(TensorInfo(TensorShape(11U, 11U, 3U, 96U), 1, _data_type, _fixed_point_position)); - b[0].allocator()->init(TensorInfo(TensorShape(96U), 1, _data_type, _fixed_point_position)); - w[1].allocator()->init(TensorInfo(TensorShape(5U, 5U, 48U, 256U), 1, _data_type, _fixed_point_position)); - b[1].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position)); - w[2].allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, _data_type, _fixed_point_position)); - b[2].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position)); - w[3].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, _data_type, _fixed_point_position)); - b[3].allocator()->init(TensorInfo(TensorShape(384U), 1, _data_type, _fixed_point_position)); - w[4].allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, _data_type, _fixed_point_position)); - b[4].allocator()->init(TensorInfo(TensorShape(256U), 1, _data_type, _fixed_point_position)); - w[5].allocator()->init(TensorInfo(TensorShape(9216U, 4096U), 1, _data_type, _fixed_point_position)); - b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); - w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position)); - b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); - w[7].allocator()->init(TensorInfo(TensorShape(4096U, 1000U), 1, _data_type, _fixed_point_position)); - b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position)); - - w21 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates())); - w22 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[1], TensorShape(5U, 5U, 48U, 128U), Coordinates(0, 0, 0, 128))); - b21 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates())); - b22 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[1], TensorShape(128U), Coordinates(128))); - - w41 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates())); - w42 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[3], TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192))); - b41 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates())); - b42 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[3], TensorShape(192U), Coordinates(192))); - - w51 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates())); - w52 = std::unique_ptr<SubTensorType>(new SubTensorType(&w[4], TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128))); - b51 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates())); - b52 = std::unique_ptr<SubTensorType>(new SubTensorType(&b[4], TensorShape(128U), Coordinates(128))); - } + template <typename ConvolutionLayerFunction1 = ConvolutionLayerFunction, typename DirectConvolutionLayerFunction1 = DirectConvolutionLayerFunction> + typename std::enable_if < !std::is_same<ConvolutionLayerFunction1, DirectConvolutionLayerFunction1>::value, void >::type + configure(ITensorType *input, const ITensorType *weights, const ITensorType *biases, ITensorType *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()) + { + _func.configure(input, weights, biases, output, conv_info); + } - void init_reshaped_weights() - { - const unsigned int data_type_size = 16 / arm_compute::data_size_from_type(_data_type); - - // Create tensor for the reshaped weights - auto w21_tensor = std::unique_ptr<TensorType>(new TensorType()); - auto w22_tensor = std::unique_ptr<TensorType>(new TensorType()); - auto w41_tensor = std::unique_ptr<TensorType>(new TensorType()); - auto w42_tensor = std::unique_ptr<TensorType>(new TensorType()); - auto w51_tensor = std::unique_ptr<TensorType>(new TensorType()); - auto w52_tensor = std::unique_ptr<TensorType>(new TensorType()); - - w[0].allocator()->init(TensorInfo(TensorShape(366U * data_type_size, 96U / data_type_size), 1, _data_type, _fixed_point_position)); - w21_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); - w22_tensor->allocator()->init(TensorInfo(TensorShape(1248U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); - w[2].allocator()->init(TensorInfo(TensorShape(2560U * data_type_size, 384U / data_type_size), 1, _data_type, _fixed_point_position)); - w41_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position)); - w42_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 192U / data_type_size), 1, _data_type, _fixed_point_position)); - w51_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); - w52_tensor->allocator()->init(TensorInfo(TensorShape(1920U * data_type_size, 128U / data_type_size), 1, _data_type, _fixed_point_position)); - - w21 = std::move(w21_tensor); - w22 = std::move(w22_tensor); - w41 = std::move(w41_tensor); - w42 = std::move(w42_tensor); - w51 = std::move(w51_tensor); - w52 = std::move(w52_tensor); - - b[5].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); - b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position)); - b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position)); - - if(_batches > 1) + template <typename ConvolutionLayerFunction1 = ConvolutionLayerFunction, typename DirectConvolutionLayerFunction1 = DirectConvolutionLayerFunction> + typename std::enable_if<std::is_same<ConvolutionLayerFunction1, DirectConvolutionLayerFunction1>::value, void>::type + configure(ITensorType *input, const ITensorType *weights, const ITensorType *biases, ITensorType *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()) { - w[5].allocator()->init(TensorInfo(TensorShape(9216U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position)); - w[6].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position)); - w[7].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 1000U / data_type_size), 1, _data_type, _fixed_point_position)); + _func.configure(input, weights, biases, output, conv_info, weights_info); } - else + + void run() { - w[5].allocator()->init(TensorInfo(TensorShape(4096U, 9216U), 1, _data_type, _fixed_point_position)); - w[6].allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, _data_type, _fixed_point_position)); - w[7].allocator()->init(TensorInfo(TensorShape(1000U, 4096U), 1, _data_type, _fixed_point_position)); + _func.run(); } - } + + DirectConvolutionLayerFunction _func{}; + }; DataType _data_type{ DataType::UNKNOWN }; int _fixed_point_position{ 0 }; unsigned int _batches{ 0 }; bool _reshaped_weights{ false }; + bool _is_direct_conv{ !std::is_same<ConvolutionLayerFunction, DirectConvolutionLayerFunction>::value }; ActivationLayerFunction act1{}, act2{}, act3{}, act4{}, act5{}, act6{}, act7{}; - ConvolutionLayerFunction conv1{}, conv21{}, conv22{}, conv3{}, conv41{}, conv42{}, conv51{}, conv52{}; + ConvolutionLayerFunction conv1{}, conv21{}, conv22{}; + DirectConv conv3{}, conv41{}, conv42{}, conv51{}, conv52{}; FullyConnectedLayerFunction fc6{}, fc7{}, fc8{}; NormalizationLayerFunction norm1{}, norm2{}; PoolingLayerFunction pool1{}, pool2{}, pool5{}; |