aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h2
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h14
-rw-r--r--arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLRNNLayer.h2
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h4
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h10
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h13
-rw-r--r--arm_compute/runtime/NEON/AssemblyHelper.h52
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h1
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h7
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMM.h15
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h2
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h12
-rw-r--r--arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h3
-rw-r--r--arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h3
21 files changed, 122 insertions, 36 deletions
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 82969301b0..7767b73e10 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -94,12 +94,14 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
CLMemoryGroup _memory_group;
CLDeconvolutionLayerUpsample _scale_f;
CLConvolutionLayer _conv_f;
CLTensor _scaled_output;
+ bool _is_prepared;
};
}
#endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index b1eb4b9e04..229fb24010 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -140,6 +140,7 @@ public:
// Inherited methods overriden:
void run() override;
+ void prepare() override;
private:
CLDepthwiseIm2ColKernel _im2col_kernel;
@@ -153,7 +154,7 @@ private:
CLTensor _weights_reshaped;
CLTensor _v2mm_output;
CLTensor _output_reshaped;
- bool _is_first_run;
+ bool _is_prepared;
bool _is_quantized;
const ICLTensor *_original_weights;
};
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
index 27cee5ed3b..a43461048a 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -70,6 +70,7 @@ public:
// Inherited methods overriden:
void run() override;
+ void prepare() override;
private:
CLDepthwiseConvolutionLayer _depthwise_conv;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index aaa432616d..3dde52989b 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -195,7 +195,6 @@ private:
bool _is_quantized;
bool _is_activationlayer_enabled;
bool _is_prepared;
- bool _retain_internal_weights;
};
}
#endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 3976704907..f404ccdf4c 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -53,6 +53,14 @@ class CLGEMMLowpMatrixMultiplyCore : public IFunction
public:
/** Constructor */
CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyCore(const CLGEMMLowpMatrixMultiplyCore &) = delete;
+ /** Default move constructor */
+ CLGEMMLowpMatrixMultiplyCore(CLGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete;
+ /** Default move assignment operator */
+ CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default;
/** Initialise the kernel's inputs, output
*
* @note GEMM_LOWP: low precision GEMM kernel
@@ -83,6 +91,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
CLMemoryGroup _memory_group;
@@ -96,11 +105,12 @@ private:
CLTensor _vector_sum_row;
CLTensor _tmp_a;
CLTensor _tmp_b;
+ const ICLTensor *_original_b;
int32_t _a_offset;
int32_t _b_offset;
bool _is_interleaved_transposed;
- bool _is_first_run;
bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
};
}
#endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index b7b2587454..c2bb47c550 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -90,6 +90,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
CLMemoryGroup _memory_group;
@@ -100,7 +101,7 @@ private:
CLTensor _input_im2col_reshaped;
CLTensor _weights_reshaped;
CLTensor _gemm_output;
- bool _is_first_run;
+ bool _is_prepared;
const ICLTensor *_original_weights;
};
}
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 9f239a9e64..ab7407dbfc 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -69,6 +69,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
CLMemoryGroup _memory_group;
@@ -80,6 +81,7 @@ private:
CLTensor _fully_connected_out;
CLTensor _gemm_output;
CLTensor _add_output;
+ bool _is_prepared;
};
}
#endif /* __ARM_COMPUTE_CLRNN_LAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
index fa29f447c8..45a883948c 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
@@ -63,7 +63,6 @@ public:
private:
GCWeightsReshapeKernel _weights_reshape_kernel;
- GCTensor _weights_reshaped;
};
/** Basic function to compute the convolution layer. This function calls the following GLES kernels:
@@ -128,6 +127,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
/** Configures the appropriate matrix multiply routine
@@ -166,8 +166,8 @@ private:
GCTensor _gemm_output;
GCTensor _tmp_output;
- bool _is_first_run;
bool _is_activationlayer_enabled;
+ bool _is_prepared;
};
}
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
index 1f8dc3e1a0..cd108c3eab 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
@@ -65,6 +65,14 @@ class GCFullyConnectedLayer : public IFunction
public:
/** Constructor */
GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ GCFullyConnectedLayer(const GCFullyConnectedLayer &) = delete;
+ /** Default move constructor */
+ GCFullyConnectedLayer(GCFullyConnectedLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ GCFullyConnectedLayer &operator=(const GCFullyConnectedLayer &) = delete;
+ /** Default move assignment operator */
+ GCFullyConnectedLayer &operator=(GCFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data type supported: F16/F32.
@@ -81,6 +89,7 @@ public:
//Inherited methods override
void run() override;
+ void prepare() override;
private:
void configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output);
@@ -93,6 +102,7 @@ private:
GCGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
GCTensor _im2col_output;
GCTensor _reshape_weights_output;
+ const IGCTensor *_original_weights;
bool _are_weights_reshaped;
bool _is_fc_after_conv;
bool _accumulate_biases;
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
index a1d6c8a438..2db254527f 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
@@ -50,7 +50,14 @@ class GCGEMM : public IFunction
public:
/** Default constructor. */
GCGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ GCGEMM(const GCGEMM &) = delete;
+ /** Default move constructor */
+ GCGEMM(GCGEMM &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ GCGEMM &operator=(const GCGEMM &) = delete;
+ /** Default move assignment operator */
+ GCGEMM &operator=(GCGEMM &&) = default;
/** Initialise the kernel's inputs and output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -86,6 +93,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
GCMemoryGroup _memory_group;
@@ -95,10 +103,11 @@ private:
GCGEMMMatrixAdditionKernel _ma_kernel;
GCTensor _tmp_a;
GCTensor _tmp_b;
+ const IGCTensor *_original_b;
bool _is_interleaved_transposed;
bool _run_addition;
- bool _is_first_run;
bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
};
}
diff --git a/arm_compute/runtime/NEON/AssemblyHelper.h b/arm_compute/runtime/NEON/AssemblyHelper.h
index 3aa43ec96e..c4ba1a584e 100644
--- a/arm_compute/runtime/NEON/AssemblyHelper.h
+++ b/arm_compute/runtime/NEON/AssemblyHelper.h
@@ -51,7 +51,7 @@ public:
using TypeResult = TypeOutput;
/** Default constructor. */
AssemblyKernelGlue()
- : _gemm_kernel_asm(nullptr), _optimised_kernel(nullptr), _a(nullptr), _b(nullptr), _d(nullptr), _workspace(nullptr), _pretranspose(nullptr)
+ : _gemm_kernel_asm(nullptr), _optimised_kernel(nullptr), _a(nullptr), _b(nullptr), _d(nullptr), _workspace(nullptr), _pretranspose(nullptr), _is_prepared(false)
{
}
/** Assembly Gemm */
@@ -76,6 +76,31 @@ public:
ITensor *_workspace;
/** Pre-transpose tensor */
ITensor *_pretranspose;
+ /** Prepared flag */
+ bool _is_prepared;
+
+ /** Runs a preparation step, usually for pre-transposing matrix b */
+ void prepare()
+ {
+ // Pretranspose B if required
+ if(_gemm_kernel_asm->B_pretranspose_required())
+ {
+ const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);
+ const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer());
+ const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);
+
+ // Forcing 128-byte alignment (required by 32-bit kernels)
+ const unsigned int alignment = 128;
+ void *raw_ptr = reinterpret_cast<void *>(_pretranspose->buffer());
+ size_t space = _pretranspose->info()->total_size();
+ void *aligned_ptr = support::cpp11::align(alignment, _gemm_kernel_asm->get_B_pretransposed_array_size(), raw_ptr, space);
+ ARM_COMPUTE_ERROR_ON(_pretranspose == nullptr || _pretranspose->buffer() == nullptr);
+ _gemm_kernel_asm->pretranspose_B_array(aligned_ptr, in1_ptr, ldb, multi_stride_b);
+ _b->mark_as_unused();
+ }
+
+ _is_prepared = true;
+ }
/** Configures the arrays pointers and strides in the assembly kernel and executes the assembly kernel.
* The call to set_arrays is needed to deal with the input sizes containing batches (dims > 2)
@@ -102,28 +127,25 @@ public:
const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer());
auto out_ptr = reinterpret_cast<TypeOutput *>(_d->buffer());
- // Set workspace if needed
+ // Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads
if(_workspace != nullptr)
{
_gemm_kernel_asm->set_working_space(reinterpret_cast<void *>(_workspace->buffer()));
+ const unsigned int window_size = _gemm_kernel_asm->get_window_size();
+ unsigned int num_threads = NEScheduler::get().num_threads();
+ if(window_size < num_threads)
+ {
+ num_threads = window_size;
+ _gemm_kernel_asm->set_nthreads(num_threads);
+ }
}
+ // Prepare assembly kernel
+ prepare();
+
// Set gemm parameters
_gemm_kernel_asm->set_arrays(in0_ptr, lda, batch_stride_a, multi_stride_a, in1_ptr, ldb, multi_stride_b, out_ptr, ldd, batch_stride_d, multi_stride_d);
- // Pretranspose B if required
- if(_gemm_kernel_asm->B_pretranspose_required())
- {
- // Forcing 128-byte alignment (required by 32-bit kernels)
- const unsigned int alignment = 128;
- void *raw_ptr = reinterpret_cast<void *>(_pretranspose->buffer());
- size_t space = _pretranspose->info()->total_size();
- void *aligned_ptr = support::cpp11::align(alignment, _gemm_kernel_asm->get_B_pretransposed_array_size(), raw_ptr, space);
- ARM_COMPUTE_ERROR_ON(_pretranspose == nullptr || _pretranspose->buffer() == nullptr);
- _gemm_kernel_asm->pretranspose_B_array(aligned_ptr, in1_ptr, ldb, multi_stride_b);
- _b->mark_as_unused();
- }
-
// Schedule assembly kernel
NEScheduler::get().schedule(_optimised_kernel.get(), Window::DimX);
}
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index ff41f0c985..e143814a4e 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -112,6 +112,7 @@ public:
const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
std::shared_ptr<IMemoryManager> _memory_manager;
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 66c6d427ba..3e527168c1 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -108,6 +108,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
MemoryGroup _memory_group;
@@ -117,6 +118,7 @@ private:
ITensor *_input;
PadStrideInfo _info;
std::pair<unsigned int, unsigned int> _inner_border;
+ bool _is_prepared;
};
} // arm_compute
#endif /* __ARM_COMPUTE_NEDECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index b80fb7f2c8..aa4cace7c2 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -122,6 +122,7 @@ public:
// Inherited methods overriden:
void run() override;
+ void prepare() override;
private:
NEDepthwiseIm2ColKernel _im2col_kernel;
@@ -135,7 +136,7 @@ private:
Tensor _weights_reshaped;
Tensor _v2mm_output;
Tensor _output_reshaped;
- bool _is_first_run;
+ bool _is_prepared;
bool _is_quantized;
const ITensor *_original_weights;
};
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
index 0562c07515..99e93ccece 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -70,6 +70,7 @@ public:
// Inherited methods overriden:
void run() override;
+ void prepare() override;
private:
NEDepthwiseConvolutionLayer _depthwise_conv;
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 071eecc3f7..2739f5ebef 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -127,22 +127,23 @@ public:
//Inherited methods override
void run() override;
+ void prepare() override;
private:
MemoryGroup _memory_group;
NEIm2ColKernel _im2col_kernel;
- NEFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+ NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
NEGEMMInterleave4x4Kernel _interleave4x4_kernel;
NEGEMMMatrixMultiplyKernel _mm_kernel;
NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
Tensor _im2col_output;
Tensor _interleave4x4_output;
Tensor _reshape_weights_output;
- bool _are_weights_reshaped;
+ const ITensor *_original_weights;
bool _is_batched_fc_layer;
bool _linearize_input;
bool _accumulate_biases;
- const ITensor *_original_weights;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index e2263c2307..5d108b2c14 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -53,7 +53,14 @@ class NEGEMM : public IFunction
public:
/** Constructor */
NEGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMM(const NEGEMM &) = delete;
+ /** Default move constructor */
+ NEGEMM(NEGEMM &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMM &operator=(const NEGEMM &) = delete;
+ /** Default move assignment operator */
+ NEGEMM &operator=(NEGEMM &&) = default;
/** Initialise the kernel's inputs, output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -72,6 +79,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
MemoryGroup _memory_group;
@@ -84,10 +92,11 @@ private:
Tensor _tmp_b;
Tensor _workspace;
Tensor _B_pretransposed;
+ const ITensor *_original_b;
bool _run_vector_matrix_multiplication;
bool _run_addition;
- bool _is_first_run;
bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
};
-}
+} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMM_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index d64fd9e771..7075becf75 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -153,6 +153,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
/** Configures the appropriate matrix multiply routine
@@ -197,6 +198,7 @@ private:
bool _is_interleaved;
bool _is_activationlayer_enabled;
bool _skip_im2col;
+ bool _is_prepared;
};
}
#endif /* __ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index adcddb8263..f32eb3c757 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -56,6 +56,14 @@ class NEGEMMLowpMatrixMultiplyCore : public IFunction
public:
/** Constructor */
NEGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpMatrixMultiplyCore(const NEGEMMLowpMatrixMultiplyCore &) = delete;
+ /** Default move constructor */
+ NEGEMMLowpMatrixMultiplyCore(NEGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete;
+ /** Default move assignment operator */
+ NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default;
/** Initialise the kernel's inputs, output
*
* @note GEMM_LOWP: low precision GEMM kernel
@@ -86,6 +94,7 @@ public:
// Inherited methods overridden
void run() override;
+ void prepare() override;
private:
MemoryGroup _memory_group;
@@ -103,12 +112,13 @@ private:
Tensor _tmp_b;
Tensor _workspace;
Tensor _B_pretranspose;
+ const ITensor *_original_b;
int32_t _a_offset;
int32_t _b_offset;
bool _run_vector_matrix_multiplication;
bool _dot_product_path;
- bool _is_first_run;
bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
};
}
#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
index 18cd27414e..7d1f124bb3 100644
--- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
@@ -90,6 +90,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
private:
MemoryGroup _memory_group;
@@ -100,7 +101,7 @@ private:
Tensor _input_im2col_reshaped;
Tensor _weights_reshaped;
Tensor _gemm_output;
- bool _is_first_run;
+ bool _is_prepared;
const ITensor *_original_weights;
};
}
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 55921f78f3..c1260977c0 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -74,6 +74,7 @@ public:
// Inherited methods overridden:
void run() override;
+ void prepare() override;
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
@@ -122,7 +123,7 @@ private:
const ITensor *_input;
const ITensor *_weights;
ITensor *_output;
- bool _reshaped_kernel;
+ bool _is_prepared;
bool _is_activationlayer_enabled;
};
}