aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/GLES_COMPUTE
diff options
context:
space:
mode:
Diffstat (limited to 'src/runtime/GLES_COMPUTE')
-rw-r--r--src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp5
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp10
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp4
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp2
4 files changed, 17 insertions, 4 deletions
diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
index 50e3cc7c1c..d8f6867634 100644
--- a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
+++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
@@ -29,8 +29,8 @@
#include <cstddef>
-using namespace arm_compute;
-
+namespace arm_compute
+{
void *GCBufferAllocator::allocate(size_t size, size_t alignment)
{
ARM_COMPUTE_UNUSED(alignment);
@@ -48,3 +48,4 @@ void GCBufferAllocator::free(void *ptr)
auto *gl_buffer = reinterpret_cast<GLBufferWrapper *>(ptr);
delete gl_buffer;
}
+} // namespace arm_compute
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
index 0f8f8e6c94..a300033bb2 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
@@ -79,7 +79,8 @@ void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTen
_mm_kernel.configure(input, weights, output, 1.0f, false);
}
-void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped)
+void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output,
+ bool transpose_weights, bool are_weights_reshaped, bool retain_internal_weights)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
@@ -141,11 +142,14 @@ void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *w
}
// Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
- if(!_are_weights_reshaped)
+ if(!_are_weights_reshaped && !retain_internal_weights)
{
// Allocate the tensor for the weights reshaped
_reshape_weights_output.allocator()->allocate();
}
+
+ ARM_COMPUTE_ERROR_ON(retain_internal_weights && _reshape_weights_output.gc_buffer() == 0);
+ _are_weights_reshaped = _are_weights_reshaped || retain_internal_weights;
}
void GCFullyConnectedLayer::run()
@@ -158,6 +162,7 @@ void GCFullyConnectedLayer::run()
}
_memory_group.acquire();
+
// Linearize input if it comes from a convolutional layer
if(_is_fc_after_conv)
{
@@ -179,5 +184,6 @@ void GCFullyConnectedLayer::run()
GCScheduler::get().dispatch(_accumulate_biases_kernel);
}
+
_memory_group.release();
}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
index 46424a59f5..9c8568a329 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
@@ -92,6 +92,10 @@ void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *
TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position());
_tmp_b.allocator()->init(info_b);
+ if(!gemm_info.reshape_b_only_on_first_run())
+ {
+ _memory_group.manage(&_tmp_b);
+ }
// Configure interleave kernel
_interleave_kernel.configure(a, &_tmp_a);
diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
index 13213d2b54..b2e69ee8c6 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
@@ -57,10 +57,12 @@ void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output,
void GCNormalizationLayer::run()
{
_memory_group.acquire();
+
GCScheduler::get().dispatch(_multiply_kernel, false);
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(_border_handler, false);
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(_norm_kernel, true);
+
_memory_group.release();
}