aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-08-22 14:28:30 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitba1ffe96eb4563ba7e18b39728d9db373c62f7c3 (patch)
tree0e931ef35b271f353d98aec59f9e3042471b4aea
parent3ada2b7a29e1ab2058ab7dc701cacff548d2aae9 (diff)
downloadComputeLibrary-ba1ffe96eb4563ba7e18b39728d9db373c62f7c3.tar.gz
COMPMID-1537: Fix weights retention in CLFullyConnectedLayer
Change-Id: Id978c34889b86fa8b9184d3349cc9b12837141a2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145403 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/Types.h16
-rw-r--r--arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h6
-rw-r--r--src/runtime/CL/functions/CLFullyConnectedLayer.cpp20
-rw-r--r--src/runtime/CL/functions/CLGEMM.cpp2
-rw-r--r--tests/validation/CL/UNIT/WeightsRetention.cpp66
-rw-r--r--tests/validation/fixtures/UNIT/WeightsRetentionFixture.h150
6 files changed, 242 insertions, 18 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index d9109e4565..37a8850237 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1141,7 +1141,7 @@ class GEMMInfo
public:
/** Default constructor */
GEMMInfo()
- : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _depth_output_gemm3d(1), _reinterpret_input_as_3d(false)
+ : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _depth_output_gemm3d(1), _reinterpret_input_as_3d(false), _retain_internal_weights(false)
{
}
/** Constructor
@@ -1152,11 +1152,12 @@ public:
* @param[in] depth_output_gemm3d (Optional) Depth (third dimension) of the output tensor to be used with the GEMM3D kernel
* @param[in] reinterpret_input_as_3d (Optional) Reinterpret the input as 3D tensor. (i.e. this flag should be set to true when GEMM is used
* to perform 1x1 convolutions with the NHWC data layout)
+ * @param[in] retain_internal_weights (Optional) Retain the weights tensor from previous run
*
*/
- GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 1, bool reinterpret_input_as_3d = false)
+ GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, int depth_output_gemm3d = 1, bool reinterpret_input_as_3d = false, bool retain_internal_weights = false)
: _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), _depth_output_gemm3d(depth_output_gemm3d),
- _reinterpret_input_as_3d(reinterpret_input_as_3d)
+ _reinterpret_input_as_3d(reinterpret_input_as_3d), _retain_internal_weights(retain_internal_weights)
{
}
/** Flag which specifies if the matrix A has been reshaped
@@ -1201,6 +1202,14 @@ public:
{
return _reinterpret_input_as_3d;
};
+ /** Flag which specifies if the weights tensor has to be retained from previous run
+ *
+ * @return True if the weights tensor has to be retained
+ */
+ bool retain_internal_weights() const
+ {
+ return _retain_internal_weights;
+ };
private:
const bool _is_a_reshaped;
@@ -1208,6 +1217,7 @@ private:
const bool _reshape_b_only_on_first_run;
const int _depth_output_gemm3d;
const bool _reinterpret_input_as_3d;
+ const bool _retain_internal_weights;
};
/** Winograd information */
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 450cd831ee..d6d88cec55 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -125,9 +125,9 @@ public:
void prepare() override;
private:
- void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
- void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
- void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
+ void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool retain_internal_weights);
+ void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool retain_internal_weights);
+ void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool retain_internal_weights);
CLMemoryGroup _memory_group;
CLConvertFullyConnectedWeights _convert_weights;
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 60c28a0874..010985db06 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -78,7 +78,7 @@ CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> mem
_is_fc_after_conv(true), _accumulate_biases(false), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
{
}
-void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output)
+void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool retain_internal_weights)
{
if(_is_quantized)
{
@@ -100,11 +100,11 @@ void CLFullyConnectedLayer::configure_mm(const ICLTensor *input, const ICLTensor
else
{
// Configure matrix multiply kernel
- _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */));
+ _mm_gemm.configure(input, weights, nullptr, output, 1.f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run */, 1, false, retain_internal_weights));
}
}
-void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output)
+void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool retain_internal_weights)
{
ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
@@ -119,18 +119,18 @@ void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLT
_flatten_layer.configure(input, &_flatten_output);
// Configure matrix multiply kernel
- configure_mm(&_flatten_output, weights, output);
+ configure_mm(&_flatten_output, weights, output, retain_internal_weights);
// Allocate the output tensor for flatten once all the configure methods have been called
_flatten_output.allocator()->allocate();
}
-void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output)
+void CLFullyConnectedLayer::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool retain_internal_weights)
{
ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
// Configure matrix multiply kernel
- configure_mm(input, weights, output);
+ configure_mm(input, weights, output, retain_internal_weights);
}
void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
@@ -150,7 +150,7 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
_is_fc_after_conv = true;
_accumulate_biases = false;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
- _is_prepared = false;
+ _is_prepared = fc_info.retain_internal_weights;
_original_weights = weights;
// Configure gemmlowp output
@@ -218,12 +218,12 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
if(_is_fc_after_conv)
{
// Fully Connected layer after a Convolution Layer without batches
- configure_conv_fc(input, weights_to_use, tmp_output);
+ configure_conv_fc(input, weights_to_use, tmp_output, fc_info.retain_internal_weights);
}
else
{
// Fully Connected layer after a Fully Connected Layer without batches
- configure_fc_fc(input, weights_to_use, tmp_output);
+ configure_fc_fc(input, weights_to_use, tmp_output, fc_info.retain_internal_weights);
}
// Configure output stage for asymmetric quantized types
@@ -235,8 +235,6 @@ void CLFullyConnectedLayer::configure(const ICLTensor *input, const ICLTensor *w
_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, output_shift, output->info()->quantization_info().offset);
_gemmlowp_output.allocator()->allocate();
}
-
- _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
}
Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index 1ad8531920..85d90a05e1 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -86,7 +86,7 @@ void CLGEMM::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *
// Check if we need to reshape the matrix B only on the first run
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
- _is_prepared = false;
+ _is_prepared = gemm_info.retain_internal_weights();
_original_b = b;
const ICLTensor *matrix_a = a;
diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp
new file mode 100644
index 0000000000..bfaef56fbb
--- /dev/null
+++ b/tests/validation/CL/UNIT/WeightsRetention.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "support/ToolchainSupport.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/UNIT/WeightsRetentionFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.05f);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(WeightsRetention)
+
+using CLWeightsRetentionFixture = WeightsRetentionReconfigureTestCaseFixture<CLTensor,
+ CLAccessor,
+ CLFullyConnectedLayer>;
+FIXTURE_TEST_CASE(WeightsRetention,
+ CLWeightsRetentionFixture,
+ framework::DatasetMode::ALL)
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h b/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h
new file mode 100644
index 0000000000..b17c003342
--- /dev/null
+++ b/tests/validation/fixtures/UNIT/WeightsRetentionFixture.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_UNIT_WEIGHTS_RETENTION
+#define ARM_COMPUTE_TEST_UNIT_WEIGHTS_RETENTION
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/FullyConnectedLayer.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Test case to run a fully connected layer with weights retention, reconfigure
+ * with different shapes and rerun making sure the weights are retained.
+ *
+ * Runs a fully connected layer stimulating is_interleaved_transpose CLGEMM,
+ * then reconfigures with different batch size and reruns.
+ */
+template <typename TensorType, typename AccessorType, typename FullyConnectedFunction>
+class WeightsRetentionReconfigureTestCaseFixture : public framework::Fixture
+{
+ using T = float;
+
+public:
+ void setup()
+ {
+ _max_batches = 8;
+ _cur_batches = 6;
+ _target = compute_target();
+ _reference = compute_reference();
+ };
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ std::uniform_real_distribution<> distribution(0.5f, 1.f);
+ library->fill(tensor, distribution, i);
+ }
+
+ TensorType compute_target()
+ {
+ // Create tensors
+ TensorType w1 = create_tensor<TensorType>(TensorShape(180000U, 150U), DataType::F32, 1);
+ TensorType b1 = create_tensor<TensorType>(TensorShape(150U), DataType::F32, 1);
+ TensorType src = create_tensor<TensorType>(TensorShape(1U, 150U, 1200U, _max_batches), DataType::F32, 1);
+ TensorType dst = create_tensor<TensorType>(TensorShape(150U, _max_batches), DataType::F32, 1);
+
+ // Create and configure function
+ FullyConnectedFunction fc_layer_1;
+ fc_layer_1.configure(&src, &w1, &b1, &dst);
+
+ // Allocate persistent tensors
+ w1.allocator()->allocate();
+ b1.allocator()->allocate();
+
+ // Allocate tensors (1st iteration)
+ src.allocator()->allocate();
+ dst.allocator()->allocate();
+
+ // Fill tensors (1st iteration)
+ fill(AccessorType(src), 0);
+ fill(AccessorType(w1), 1);
+ fill(AccessorType(b1), 2);
+
+ // Compute functions (1st iteration)
+ fc_layer_1.run();
+
+ // Update tensor shapes (2nd iteration)
+ auto src_padding = src.allocator()->info().padding();
+ auto dst_padding = dst.allocator()->info().padding();
+ int diff = _max_batches - _cur_batches;
+ auto new_src_padding = PaddingSize(src_padding.top, src_padding.right, src_padding.bottom + diff, src_padding.left);
+ auto new_dst_padding = PaddingSize(dst_padding.top, dst_padding.right, dst_padding.bottom + diff, dst_padding.left);
+ src.allocator()->info().set_tensor_shape(TensorShape(1U, 150U, 1200U, _cur_batches)).set_is_resizable(true).extend_padding(new_src_padding);
+ src.allocator()->info().set_is_resizable(false);
+ dst.allocator()->info().set_tensor_shape(TensorShape(150U, _cur_batches)).set_is_resizable(true).extend_padding(new_dst_padding);
+ dst.allocator()->info().set_is_resizable(false);
+
+ // Configure FC info
+ FullyConnectedLayerInfo fc_info;
+ fc_info.retain_internal_weights = true;
+
+ // Configure functions (2nd iteration)
+ fc_layer_1.configure(&src, &w1, &b1, &dst, fc_info);
+
+ // Fill tensors (2nd iteration)
+ fill(AccessorType(src), 5);
+
+ // Compute functions (2nd iteration)
+ fc_layer_1.run();
+
+ return dst;
+ }
+
+ SimpleTensor<T> compute_reference()
+ {
+ // Create reference
+ SimpleTensor<T> w1{ TensorShape(180000U, 150U), DataType::F32 };
+ SimpleTensor<T> b1{ TensorShape(150U), DataType::F32 };
+ SimpleTensor<T> src{ TensorShape(1U, 150U, 1200U, _cur_batches), DataType::F32 };
+
+ // Fill reference
+ fill(src, 5);
+ fill(w1, 1);
+ fill(b1, 2);
+
+ return reference::fully_connected_layer(src, w1, b1, TensorShape(150U, _cur_batches));
+ }
+
+protected:
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ unsigned int _max_batches{};
+ unsigned int _cur_batches{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_UNIT_WEIGHTS_RETENTION */