aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h1
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp52
-rw-r--r--tests/benchmark/NEON/DirectConvolutionLayer.cpp70
-rw-r--r--tests/benchmark/NEON/SYSTEM/AlexNet.cpp7
-rw-r--r--tests/validation/NEON/DirectConvolutionLayer.cpp3
-rw-r--r--tests/validation/NEON/SYSTEM/AlexNet.cpp3
6 files changed, 59 insertions, 77 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index 4de6c12218..928ac3569d 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
@@ -74,6 +74,7 @@ private:
PadStrideInfo _conv_info;
BorderSize _border_size;
unsigned int _kernel_size;
+ unsigned int _num_weight_elems_read_per_row;
unsigned int _num_elems_read_per_iteration;
unsigned int _num_elems_written_per_iteration;
};
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index d4171c5a67..d23a2e5847 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -1234,7 +1234,8 @@ inline void convolve_5x5(const Window &window, unsigned int num_elems_read_per_i
} // namespace
NEDirectConvolutionLayerKernel::NEDirectConvolutionLayerKernel()
- : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_elems_read_per_iteration(0), _num_elems_written_per_iteration(0)
+ : _input(nullptr), _weights(nullptr), _output(nullptr), _conv_info(), _border_size(0), _kernel_size(0), _num_weight_elems_read_per_row(0), _num_elems_read_per_iteration(0),
+ _num_elems_written_per_iteration(0)
{
}
@@ -1296,8 +1297,6 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, output->info()->data_type());
- Window win = calculate_max_window(*output->info());
-
switch(_kernel_size)
{
case 1:
@@ -1318,13 +1317,8 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
ARM_COMPUTE_ERROR("Data type not supported.");
break;
}
-
- _num_elems_read_per_iteration = conv_stride_x * _num_elems_written_per_iteration;
- win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration));
- AccessWindowHorizontal input_access(input->info(), 0, _num_elems_read_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+ _num_weight_elems_read_per_row = kernel_size;
+ _num_elems_read_per_iteration = conv_stride_x * _num_elems_written_per_iteration;
break;
}
case 3:
@@ -1333,6 +1327,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
switch(input->info()->data_type())
{
case DataType::F32:
+ _num_weight_elems_read_per_row = 4 + _kernel_size - 1;
_num_elems_read_per_iteration = 12;
_num_elems_written_per_iteration = 16 >> conv_stride_x;
break;
@@ -1341,6 +1336,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
#endif /* ARM_COMPUTE_ENABLE_FP16 */
case DataType::QS8:
case DataType::QS16:
+ _num_weight_elems_read_per_row = 8 + _kernel_size - 1;
_num_elems_read_per_iteration = 24;
_num_elems_written_per_iteration = 32 >> conv_stride_x;
break;
@@ -1348,26 +1344,8 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
ARM_COMPUTE_ERROR("Data type not supported.");
break;
}
-
- // Calculate right and bottom border
- const unsigned int conv_stride_y = std::get<1>(_conv_info.stride());
- const int input_width = input->info()->dimension(0);
- const int input_height = input->info()->dimension(1);
- const int upper_bound_w = ceil_to_multiple(((output->info()->dimension(0) - 1) * conv_stride_x + _kernel_size), _num_elems_read_per_iteration) - conv_pad_x - input_width;
- const int upper_bound_h = ((output->info()->dimension(1) - 1) * conv_stride_y - conv_pad_y + _kernel_size) - input_height;
- _border_size.right = std::max(upper_bound_w, static_cast<int>(_kernel_size));
- _border_size.bottom = std::max(upper_bound_h, static_cast<int>(_kernel_size));
-
- // Create window and update padding
- win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration));
- AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
- AccessWindowStatic weights_access(weights->info(), 0, 0, _kernel_size, _kernel_size);
- AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration);
- update_window_and_padding(win, input_access, weights_access, output_access);
- output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
- break;
}
+ break;
default:
{
ARM_COMPUTE_ERROR("Not implemented");
@@ -1375,6 +1353,22 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
}
}
+ // Calculate right and bottom border
+ const unsigned int conv_stride_y = std::get<1>(_conv_info.stride());
+ const int input_width = input->info()->dimension(0);
+ const int input_height = input->info()->dimension(1);
+ const int upper_bound_w = ceil_to_multiple(((output->info()->dimension(0) - 1) * conv_stride_x + _kernel_size), _num_elems_read_per_iteration) - conv_pad_x - input_width;
+ const int upper_bound_h = ((output->info()->dimension(1) - 1) * conv_stride_y - conv_pad_y + _kernel_size) - input_height;
+ _border_size.right = std::max(upper_bound_w, static_cast<int>(_kernel_size));
+ _border_size.bottom = std::max(upper_bound_h, static_cast<int>(_kernel_size));
+
+ Window win = calculate_max_window(*output->info(), Steps(_num_elems_written_per_iteration));
+ AccessWindowStatic input_access(input->info(), -conv_pad_x, -conv_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
+ AccessWindowStatic weights_access(weights->info(), 0, 0, _num_weight_elems_read_per_row, _kernel_size);
+ AccessWindowHorizontal output_access(output->info(), 0, _num_elems_written_per_iteration);
+ update_window_and_padding(win, input_access, weights_access, output_access);
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
INEKernel::configure(win);
}
diff --git a/tests/benchmark/NEON/DirectConvolutionLayer.cpp b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
index 2c94118623..a9dd4b94ea 100644
--- a/tests/benchmark/NEON/DirectConvolutionLayer.cpp
+++ b/tests/benchmark/NEON/DirectConvolutionLayer.cpp
@@ -56,56 +56,46 @@ using NEConvolutionLayerFixture = ConvolutionLayerFixture<Tensor, NEDirectConvol
TEST_SUITE(NEON)
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
- framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", 1)));
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", 1)));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
- framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", 1)));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", 1)));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
- framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", 1)));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", 1)));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
- framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", 1)));
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", 1)));
TEST_SUITE(NIGHTLY)
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
- framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", { 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", { 4, 8 })));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
- framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", { 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", { 4, 8 })));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
- framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", { 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", { 4, 8 })));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
- framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", { 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", { 4, 8 })));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(VGG16DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
- framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", { 1, 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", { 1, 4, 8 })));
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
- framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(), data_types),
- framework::dataset::make("Batches", { 1, 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2DirectConvolutionLayer, NEConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+ framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(), data_types),
+ framework::dataset::make("Batches", { 1, 4, 8 })));
TEST_SUITE_END()
TEST_SUITE_END()
diff --git a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
index 9de5244ed2..cd48e5db25 100644
--- a/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
+++ b/tests/benchmark/NEON/SYSTEM/AlexNet.cpp
@@ -67,10 +67,9 @@ using NEAlexNetFixture = AlexNetFixture<ITensor,
TEST_SUITE(NEON)
TEST_SUITE(SYSTEM_TEST)
-//FIXME: COMPMID-526
-DISABLED_REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, NEAlexNetFixture, framework::DatasetMode::ALL,
- framework::dataset::combine(alex_net_data_types,
- framework::dataset::make("Batches", { 1, 4, 8 })));
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNet, NEAlexNetFixture, framework::DatasetMode::ALL,
+ framework::dataset::combine(alex_net_data_types,
+ framework::dataset::make("Batches", { 1, 4, 8 })));
TEST_SUITE_END()
TEST_SUITE_END()
diff --git a/tests/validation/NEON/DirectConvolutionLayer.cpp b/tests/validation/NEON/DirectConvolutionLayer.cpp
index a0051d8286..6211d31c45 100644
--- a/tests/validation/NEON/DirectConvolutionLayer.cpp
+++ b/tests/validation/NEON/DirectConvolutionLayer.cpp
@@ -109,8 +109,7 @@ TEST_SUITE_END()
#endif /* ARM_COMPUTE_ENABLE_FP16 */
TEST_SUITE(FP32)
-//FIXME: COMPMID-526
-DISABLED_FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(Run, NEDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data_f32, framework::dataset::make("DataType", DataType::F32)))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/NEON/SYSTEM/AlexNet.cpp b/tests/validation/NEON/SYSTEM/AlexNet.cpp
index dbc38883e7..7a2b0d22b0 100644
--- a/tests/validation/NEON/SYSTEM/AlexNet.cpp
+++ b/tests/validation/NEON/SYSTEM/AlexNet.cpp
@@ -96,8 +96,7 @@ std::vector<unsigned int> compute_alexnet(DataType dt, unsigned int batches, std
TEST_SUITE(NEON)
TEST_SUITE(SYSTEM_TESTS)
-//FIXME: COMPMID-526
-DISABLED_TEST_CASE(AlexNet, framework::DatasetMode::PRECOMMIT)
+TEST_CASE(AlexNet, framework::DatasetMode::PRECOMMIT)
{
// Compute alexnet
std::vector<unsigned int> classified_labels = compute_alexnet(DataType::F32, 1, "cnn_data/imagenet_data/cat.npy");