COMPMID-477 - Optimized batched case in CLConvolutionLayer

Change-Id: I4ef18f49f1da0cb816aaa0762466b940792c15ed Reviewed-on: http://mpd-gerrit.cambridge.arm.com/84162 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2017-08-15 11:45:22 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:35:24 +0000
commit: edfa9f463bed084f8b0953557202b2a1e56da817 (patch)
tree: 5d1e92926d112fde05dcbc61324d96f73f692390 /tests
parent: dc460f13ee65e27b2a428e44c2d80afb1f516a99 (diff)
download: ComputeLibrary-edfa9f463bed084f8b0953557202b2a1e56da817.tar.gz
5 files changed, 18 insertions, 23 deletions
diff --git a/tests/model_objects/AlexNet.h b/tests/model_objects/AlexNet.h
index c9fd448d5d..45622e2118 100644
--- a/tests/model_objects/AlexNet.h
+++ b/tests/model_objects/AlexNet.h
@@ -24,6 +24,8 @@
 #ifndef __ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
 #define __ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
 
+#include "arm_compute/runtime/Tensor.h"
+
 #include "tests/AssetsLibrary.h"
 #include "tests/Globals.h"
 #include "tests/Utils.h"
@@ -149,7 +151,7 @@ public:
             b[6]->allocator()->init(TensorInfo(TensorShape(4096U), 1, dt, fixed_point_position));
             b[7]->allocator()->init(TensorInfo(TensorShape(1000U), 1, dt, fixed_point_position));
 
-            if(_batches > 1)
+            if(_batches > 1 && std::is_same<TensorType, Tensor>::value)
             {
                 w[5]->allocator()->init(TensorInfo(TensorShape(9216U * dt_size, 4096U / dt_size), 1, dt, fixed_point_position));
                 w[6]->allocator()->init(TensorInfo(TensorShape(4096U * dt_size, 4096U / dt_size), 1, dt, fixed_point_position));
diff --git a/tests/networks_new/AlexNetNetwork.h b/tests/networks_new/AlexNetNetwork.h
index 39c69daf60..b3a719671d 100644
--- a/tests/networks_new/AlexNetNetwork.h
+++ b/tests/networks_new/AlexNetNetwork.h
@@ -24,6 +24,8 @@
 #ifndef __ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
 #define __ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
 
+#include "arm_compute/runtime/Tensor.h"
+
 #include "AssetsLibrary.h"
 #include "Globals.h"
 #include "Utils.h"
@@ -153,7 +155,7 @@ public:
             b[6].allocator()->init(TensorInfo(TensorShape(4096U), 1, _data_type, _fixed_point_position));
             b[7].allocator()->init(TensorInfo(TensorShape(1000U), 1, _data_type, _fixed_point_position));
 
-            if(_batches > 1)
+            if(_batches > 1 && std::is_same<TensorType, Tensor>::value)
             {
                 w[5].allocator()->init(TensorInfo(TensorShape(9216U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position));
                 w[6].allocator()->init(TensorInfo(TensorShape(4096U * data_type_size, 4096U / data_type_size), 1, _data_type, _fixed_point_position));
diff --git a/tests/validation_new/CL/FullyConnectedLayer.cpp b/tests/validation_new/CL/FullyConnectedLayer.cpp
index 9bf3a75d88..e43997c47b 100644
--- a/tests/validation_new/CL/FullyConnectedLayer.cpp
+++ b/tests/validation_new/CL/FullyConnectedLayer.cpp
@@ -80,16 +80,6 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame
         const size_t shape_x = ws.x();
         ws.set(0, ws.y());
         ws.set(1, shape_x);
-
-        // Weights have to be passed reshaped
-        // Transpose 1xW for batched version
-        if(!reshape_weights && dst_shape.y() > 1)
-        {
-            const float  transpose_width = 16.0f / data_size_from_type(data_type);
-            const size_t shape_x         = ws.x();
-            ws.set(0, ws.y() * static_cast<unsigned int>(transpose_width));
-            ws.set(1, static_cast<unsigned int>(std::ceil(shape_x / transpose_width)));
-        }
     }
 
     // Create tensors
@@ -113,7 +103,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame
 }
 
 template <typename T>
-using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
+using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, false>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -150,7 +140,7 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
-using CLFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixedPointFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
+using CLFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixedPointFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, false>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
diff --git a/tests/validation_new/NEON/FullyConnectedLayer.cpp b/tests/validation_new/NEON/FullyConnectedLayer.cpp
index 6eb18ebc6a..e859fb3872 100644
--- a/tests/validation_new/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation_new/NEON/FullyConnectedLayer.cpp
@@ -117,7 +117,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame
 }
 
 template <typename T>
-using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T>;
+using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
 
 TEST_SUITE(Float)
 #ifdef ARM_COMPUTE_ENABLE_FP16
@@ -156,7 +156,7 @@ TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
-using NEFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixedPointFixture<Tensor, Accessor, NEFullyConnectedLayer, T>;
+using NEFullyConnectedLayerFixedPointFixture = FullyConnectedLayerValidationFixedPointFixture<Tensor, Accessor, NEFullyConnectedLayer, T, true>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
diff --git a/tests/validation_new/fixtures/FullyConnectedLayerFixture.h b/tests/validation_new/fixtures/FullyConnectedLayerFixture.h
index eb4aad8952..0953b0b67e 100644
--- a/tests/validation_new/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation_new/fixtures/FullyConnectedLayerFixture.h
@@ -76,7 +76,7 @@ RawTensor transpose(const RawTensor &src, int interleave = 1)
 }
 } // namespace
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool run_interleave>
 class FullyConnectedLayerValidationFixedPointFixture : public framework::Fixture
 {
 public:
@@ -131,7 +131,7 @@ protected:
 
             // Weights have to be passed reshaped
             // Transpose 1xW for batched version
-            if(!reshape_weights && output_shape.y() > 1)
+            if(!reshape_weights && output_shape.y() > 1 && run_interleave)
             {
                 const int   transpose_width = 16 / data_size_from_type(data_type);
                 const float shape_x         = reshaped_weights_shape.x();
@@ -182,7 +182,7 @@ protected:
             tmp = transpose(tmp);
 
             // Reshape weights for batched runs
-            if(!reshape_weights && output_shape.y() > 1)
+            if(!reshape_weights && output_shape.y() > 1 && run_interleave)
             {
                 // Transpose with interleave
                 const int interleave_size = 16 / tmp.element_size();
@@ -232,15 +232,16 @@ protected:
     DataType        _data_type{};
 };
 
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool run_interleave>
+class FullyConnectedLayerValidationFixture : public FullyConnectedLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T, run_interleave>
 {
 public:
     template <typename...>
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type)
     {
-        FullyConnectedLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights, data_type,
-                                                                                                         0);
+        FullyConnectedLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T, run_interleave>::setup(input_shape, weights_shape, bias_shape, output_shape, transpose_weights,
+                                                                                                                         reshape_weights, data_type,
+                                                                                                                         0);
     }
 };
 } // namespace validation
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2017-08-15 11:45:22 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:35:24 +0000
commit	edfa9f463bed084f8b0953557202b2a1e56da817 (patch)
tree	5d1e92926d112fde05dcbc61324d96f73f692390 /tests
parent	dc460f13ee65e27b2a428e44c2d80afb1f516a99 (diff)
download	ComputeLibrary-edfa9f463bed084f8b0953557202b2a1e56da817.tar.gz