aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--examples/graph_squeezenet_v1_1.cpp2
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp23
-rw-r--r--tests/datasets/LargeGEMMDataset.h8
-rw-r--r--tests/datasets/SmallGEMMDataset.h2
4 files changed, 26 insertions, 9 deletions
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index bf6324698f..e52062927b 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -87,7 +87,6 @@ public:
graph << common_params.target
<< common_params.fast_math_hint
<< InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
- << ConvolutionMethod::Direct
<< ConvolutionLayer(
3U, 3U, 64U,
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout),
@@ -95,7 +94,6 @@ public:
PadStrideInfo(2, 2, 0, 0))
<< ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
<< PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
- << ConvolutionMethod::Default
<< ConvolutionLayer(
1U, 1U, 16U,
get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", weights_layout),
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index c9e6bb34b2..79e2f8b11a 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -57,6 +57,7 @@ inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *i
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, "The number of dimensions for the matrix B must be <= 3");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_interleaved_transposed && reshape_info.reinterpret_input_as_3d(), "The input tensor cannot be reinterpreted as 3D if is_interleaved_transposed is true");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 && reshape_info.reinterpret_input_as_3d(), "The input1 tensor cannot have more than 2 dimensions if input0 has to be reinterpreted as 3D");
if(!is_interleaved_transposed)
{
@@ -109,13 +110,23 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
const DataType data_type = input0->data_type();
unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
+ bool reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
+ bool reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+
+ // In case both input and output have to be reinterpreted as 3D tensors,
+ // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
+ if(reinterpret_input_as_3d == reinterpret_output_as_3d)
+ {
+ reinterpret_input_as_3d = false;
+ reinterpret_output_as_3d = false;
+ }
// Output tensor auto inizialitation if not yet initialized
auto_init_if_empty(*output, input0->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, is_interleaved_transposed, reshape_info)));
TensorInfo tmp_info(*output);
- if(reshape_info.depth_output_gemm3d() != 1)
+ if(reinterpret_output_as_3d)
{
// Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D GEMM,
// the window needs to be constructed on the 2D collapsed version of the tensor
@@ -162,7 +173,7 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo *inpu
// Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor
// The only way to set properly the paddings, it is to set those explicitly through the AccessWindowStatic
- const int m = reshape_info.reinterpret_input_as_3d() ? input0->tensor_shape()[1] * input0->tensor_shape()[2] : input0->tensor_shape()[1];
+ const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2] : input0->tensor_shape()[1];
const int bottom_pad = (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % num_elems_processed_per_iteration_y;
// Create kernels according to the architecture, data type and input size.
@@ -219,6 +230,14 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
_reinterpret_input_as_3d = reshape_info.reinterpret_input_as_3d();
_reinterpret_output_as_3d = (reshape_info.depth_output_gemm3d() != 1);
+ // In case both input and output have to be reinterpreted as 3D tensors,
+ // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
+ if(_reinterpret_input_as_3d == _reinterpret_output_as_3d)
+ {
+ _reinterpret_input_as_3d = false;
+ _reinterpret_output_as_3d = false;
+ }
+
// Check if we need to slide the matrix B
const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d ? _input0->info()->num_dimensions() - 1 : _input0->info()->num_dimensions();
diff --git a/tests/datasets/LargeGEMMDataset.h b/tests/datasets/LargeGEMMDataset.h
index 9c45cb0657..bbf362ca21 100644
--- a/tests/datasets/LargeGEMMDataset.h
+++ b/tests/datasets/LargeGEMMDataset.h
@@ -71,11 +71,11 @@ public:
{
add_config(TensorShape(923U, 143U, 3U), TensorShape(871U, 923U), TensorShape(871U, 143U, 3U), TensorShape(871U, 143U, 3U), 1.0f, 0.0f);
add_config(TensorShape(681U, 205U, 5U), TensorShape(213U, 681U), TensorShape(213U, 205U, 5U), TensorShape(213U, 205U, 5U), 1.0f, 0.0f);
- add_config(TensorShape(364U, 605U, 5U), TensorShape(96U, 364U), TensorShape(96U, 605U, 5U), TensorShape(96U, 605U, 5U), 1.0f, 0.0f);
+ add_config(TensorShape(364U, 605U, 5U), TensorShape(96U, 364U), TensorShape(96U, 605U, 5U), TensorShape(96U, 605U, 5U), 0.2f, 1.2f);
add_config(TensorShape(1201U, 243U, 3U), TensorShape(128U, 1201U), TensorShape(128U, 243U, 3U), TensorShape(128U, 243U, 3U), 1.0f, 0.0f);
- add_config(TensorShape(2305U, 13U, 13U), TensorShape(384U, 2305U), TensorShape(384U, 13U, 13U), TensorShape(384U, 13U, 13U), 1.0f, 0.0f);
- add_config(TensorShape(1729U, 85U, 2U, 2U), TensorShape(192U, 1729U, 2U), TensorShape(192U, 85U, 2U, 2U), TensorShape(192U, 85U, 2U, 2U), 1.0f, 0.0f);
- add_config(TensorShape(1729U, 17U, 10U, 3U), TensorShape(128U, 1729U, 3U), TensorShape(128U, 17U, 10U, 3U), TensorShape(128U, 17U, 10U, 3U), 1.0f, 0.0f);
+ add_config(TensorShape(2305U, 13U, 13U), TensorShape(384U, 2305U), TensorShape(384U, 13U, 13U), TensorShape(384U, 13U, 13U), 0.4f, 0.7f);
+ add_config(TensorShape(1729U, 85U, 2U, 2U), TensorShape(192U, 1729U), TensorShape(192U, 85U, 2U, 2U), TensorShape(192U, 85U, 2U, 2U), 1.0f, 0.0f);
+ add_config(TensorShape(1729U, 17U, 10U, 3U), TensorShape(128U, 1729U), TensorShape(128U, 17U, 10U, 3U), TensorShape(128U, 17U, 10U, 3U), 1.0f, 0.3f);
}
};
} // namespace datasets
diff --git a/tests/datasets/SmallGEMMDataset.h b/tests/datasets/SmallGEMMDataset.h
index e108fcc1ca..15a3504e85 100644
--- a/tests/datasets/SmallGEMMDataset.h
+++ b/tests/datasets/SmallGEMMDataset.h
@@ -73,7 +73,7 @@ public:
add_config(TensorShape(38U, 12U, 2U), TensorShape(21U, 38U), TensorShape(21U, 12U, 2U), TensorShape(21U, 12U, 2U), 0.2f, 1.2f);
add_config(TensorShape(32U, 1U, 4U, 3U), TensorShape(17U, 32U), TensorShape(17U, 1U, 4U, 3U), TensorShape(17U, 1U, 4U, 3U), 0.4f, 0.7f);
add_config(TensorShape(16U, 16U, 3U, 2U), TensorShape(8U, 16U), TensorShape(8U, 16U, 3U, 2U), TensorShape(8U, 16U, 3U, 2U), 1.0f, 0.0f);
- add_config(TensorShape(16U, 16U, 5U, 3U), TensorShape(8U, 16U, 3U), TensorShape(8U, 16U, 5U, 3U), TensorShape(8U, 16U, 5U, 3U), 1.0f, 0.0f);
+ add_config(TensorShape(16U, 16U, 5U, 3U), TensorShape(8U, 16U), TensorShape(8U, 16U, 5U, 3U), TensorShape(8U, 16U, 5U, 3U), 1.0f, 0.3f);
}
};
} // namespace datasets