diff options
-rw-r--r-- | arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp | 52 | ||||
-rw-r--r-- | tests/benchmark/NEON/DirectConvolutionLayer.cpp | 70 | ||||
-rw-r--r-- | tests/benchmark/NEON/SYSTEM/AlexNet.cpp | 7 | ||||
-rw-r--r-- | tests/validation/NEON/DirectConvolutionLayer.cpp | 3 | ||||
-rw-r--r-- | tests/validation/NEON/SYSTEM/AlexNet.cpp | 3 |
6 files changed, 59 insertions, 77 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h index 4de6c12218..928ac3569d 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -74,6 +74,7 @@ private: PadStrideInfo _conv_info; BorderSize _border_size; unsigned int _kernel_size; + unsigned int _num_weight_elems_read_per_row; unsigned int _num_elems_read_per_iteration; unsigned int _num_elems_written_per_iteration; }; diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index d4171c5a67..d23a2e5847 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1234,7 +1234,8 @@ inline void convolve_5x5(const Window &window, unsigned int num_elems_read_per_i } // namespace NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel() - : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_elems_read_per_iteration(0), _num_elems_written_per_iteration(0) + : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0), + _num_elems_written_per_iteration(0) { } @@ -1296,8 +1297,6 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, output->info()->data_type()); - Window win = calculate_max_window(*output->info()); - switch(_kernel_size) { case 1: @@ -1318,13 +1317,8 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens ARM_COMPUTE_ERROR("Data type not supported."); break; } - - _num_elems_read_per_iteration = conv_stride_x * _num_elems_written_per_iteration; - win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, _num_elems_read_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + _num_weight_elems_read_per_row = kernel_size; + _num_elems_read_per_iteration = conv_stride_x * _num_elems_written_per_iteration; break; } case 3: @@ -1333,6 +1327,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens switch(input->info()->data_type()) { case DataType::F32: + _num_weight_elems_read_per_row = 4 + _kernel_size - 1; _num_elems_read_per_iteration = 12; _num_elems_written_per_iteration = 16 >> conv_stride_x; break; @@ -1341,6 +1336,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens #endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: case DataType::QS16: + _num_weight_elems_read_per_row = 8 + _kernel_size - 1; _num_elems_read_per_iteration = 24; _num_elems_written_per_iteration = 32 >> conv_stride_x; break; @@ -1348,26 +1344,8 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens ARM_COMPUTE_ERROR("Data type not supported."); break; } - - // Calculate right and bottom border - const unsigned int conv_stride_y = std::get<1>(_conv_info.stride()); - const int input_width = input->info()->dimension(0); - const int input_height = input->info()->dimension(1); - const int upper_bound_w = ceil_to_multiple(((output->info()->dimension(0) - 1) * conv_stride_x + _kernel_size), _num_elems_read_per_iteration) - conv_pad_x - input_width; - const int upper_bound_h = ((output->info()->dimension(1) - 1) * conv_stride_y - conv_pad_y + _kernel_size) - input_height; - _border_size.right = std::max(upper_bound_w, static_cast<int>(_kernel_size)); - _border_size.bottom = std::max(upper_bound_h, static_cast<int>(_kernel_size)); - - // Create window and update padding - win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration)); - AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input_width + _border_size.right, input_height + _border_size.bottom); - AccessWindowStatic weights_access(weights->info(), 0, 0, _kernel_size, _kernel_size); - AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration); - update_window_and_padding(win, input_access, weights_access, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); - - break; } + break; default: { ARM_COMPUTE_ERROR("Not implemented"); @@ -1375,6 +1353,22 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens } } + // Calculate right and bottom border + const unsigned int conv_stride_y = std::get<1>(_conv_info.stride()); + const int input_width = input->info()->dimension(0); + const int input_height = input->info()->dimension(1); + const int upper_bound_w = ceil_to_multiple(((output->info()->dimension(0) - 1) * conv_stride_x + _kernel_size), _num_elems_read_per_iteration) - conv_pad_x - input_width; + const int upper_bound_h = ((output->info()->dimension(1) - 1) * conv_stride_y - conv_pad_y + _kernel_size) - input_height; + _border_size.right = std::max(upper_bound_w, static_cast<int>(_kernel_size)); + _border_size.bottom = std::max(upper_bound_h, static_cast<int>(_kernel_size)); + + Window win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration)); + AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input_width + _border_size.right, input_height + _border_size.bottom); + AccessWindowStatic weights_access(weights->info(), 0, 0, _num_weight_elems_read_per_row, _kernel_size); + AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration); + update_window_and_padding(win, input_access, weights_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + INEKernel::configure(win); } diff --git a/tests/benchmark/NEON/DirectConvolutionLayer.cpp b/tests/benchmark/NEON/DirectConvolutionLayer.cpp index 2c94118623..a9dd4b94ea 100644 --- a/tests/benchmark/NEON/DirectConvolutionLayer.cpp +++ b/tests/benchmark/NEON/DirectConvolutionLayer.cpp @@ -56,56 +56,46 @@ using NEConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEDirectConvol TEST_SUITE(NEON) -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", 1))); +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", 1))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", 1))); +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", 1))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", 1))); +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", 1))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, - framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", 1))); +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", 1))); TEST_SUITE(NIGHTLY) -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", { 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 4, 8 }))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", { 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 4, 8 }))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", { 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 4, 8 }))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", { 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 4, 8 }))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(VGG16DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", { 1, 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(VGG16DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, - framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(), data_types), - framework::dataset::make("Batches", { 1, 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(), data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp index 9de5244ed2..cd48e5db25 100644 --- a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp +++ b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp @@ -67,10 +67,9 @@ using NEAlexNetFixture = AlexNetFixture<ITensor, TEST_SUITE(NEON) TEST_SUITE(SYSTEM_TEST) -//FIXME: COMPMID-526 -DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, NEAlexNetFixture, framework::DatasetMode::ALL, - framework::dataset::combine(alex_net_data_types, - framework::dataset::make("Batches", { 1, 4, 8 }))); +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, NEAlexNetFixture, framework::DatasetMode::ALL, + framework::dataset::combine(alex_net_data_types, + framework::dataset::make("Batches", { 1, 4, 8 }))); TEST_SUITE_END() TEST_SUITE_END() diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp index a0051d8286..6211d31c45 100644 --- a/tests/validation/NEON/DirectConvolutionLayer.cpp +++ b/tests/validation/NEON/DirectConvolutionLayer.cpp @@ -109,8 +109,7 @@ TEST_SUITE_END() #endif /* ARM_COMPUTE_ENABLE_FP16 */ TEST_SUITE(FP32) -//FIXME: COMPMID-526 -DISABLED_FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32))) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/NEON/SYSTEM/AlexNet.cpp b/tests/validation/NEON/SYSTEM/AlexNet.cpp index dbc38883e7..7a2b0d22b0 100644 --- a/tests/validation/NEON/SYSTEM/AlexNet.cpp +++ b/tests/validation/NEON/SYSTEM/AlexNet.cpp @@ -96,8 +96,7 @@ std::vector<unsigned int> compute_alexnet(DataType dt, unsigned int batches, std TEST_SUITE(NEON) TEST_SUITE(SYSTEM_TESTS) -//FIXME: COMPMID-526 -DISABLED_TEST_CASE(AlexNet, framework::DatasetMode::PRECOMMIT) +TEST_CASE(AlexNet, framework::DatasetMode::PRECOMMIT) { // Compute alexnet std::vector<unsigned int> classified_labels = compute_alexnet(DataType::F32, 1, "cnn_data/imagenet_data/cat.npy"); |