aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/GLES_COMPUTE
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2017-10-26 15:23:08 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit7068f9900d136312318ff430aef588b14e0c87ad (patch)
treeb57ca81231860f1d8755e6f18e5be7c959fb60c6 /src/runtime/GLES_COMPUTE
parentd60737592736715dcfd0520535c48190d4ac77d2 (diff)
downloadComputeLibrary-7068f9900d136312318ff430aef588b14e0c87ad.tar.gz
COMPMID-631: Merge branches/gles_compute branch
Last commit: commit b25c5f68042b0c81bf611d59a1bb8535e1c42497 Author: Xinghang Zhou <xinghang.zhou@arm.com> Date: Wed Oct 25 18:48:10 2017 +0800 Synced validation's tolerances of GCSoftmax from cl side Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/runtime/GLES_COMPUTE')
-rw-r--r--src/runtime/GLES_COMPUTE/GCScheduler.cpp61
-rw-r--r--src/runtime/GLES_COMPUTE/GCTensor.cpp77
-rw-r--r--src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp94
-rw-r--r--src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp45
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp40
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp37
-rwxr-xr-xsrc/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp48
-rwxr-xr-xsrc/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp69
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp64
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp50
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp40
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp177
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp133
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp36
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp38
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp61
-rwxr-xr-xsrc/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp38
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp42
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp66
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp38
20 files changed, 1254 insertions, 0 deletions
diff --git a/src/runtime/GLES_COMPUTE/GCScheduler.cpp b/src/runtime/GLES_COMPUTE/GCScheduler.cpp
new file mode 100644
index 0000000000..b2235ea6f9
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCScheduler.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+
+using namespace arm_compute;
+
+GCScheduler::GCScheduler() = default;
+
+void GCScheduler::default_init()
+{
+ GCKernelLibrary::get().init("./cs_shaders/");
+}
+
+void GCScheduler::init(EGLDisplay dpy, EGLContext ctx)
+{
+ GCKernelLibrary::get().init("./cs_shaders/", dpy, ctx);
+}
+
+GCScheduler &GCScheduler::get()
+{
+ static GCScheduler scheduler;
+ return scheduler;
+}
+
+void GCScheduler::enqueue(IGCKernel &kernel, bool flush)
+{
+ kernel.run(kernel.window());
+ if(flush)
+ {
+ ARM_COMPUTE_GL_CHECK(glFlush());
+ }
+}
+
+void GCScheduler::sync()
+{
+ ARM_COMPUTE_GL_CHECK(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT));
+}
diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp
new file mode 100644
index 0000000000..edbd16dc1d
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+using namespace arm_compute;
+
+GCTensor::GCTensor()
+ : _allocator()
+{
+}
+
+ITensorAllocator *GCTensor::allocator()
+{
+ return &_allocator;
+}
+
+TensorInfo *GCTensor::info() const
+{
+ return &_allocator.info();
+}
+
+TensorInfo *GCTensor::info()
+{
+ return &_allocator.info();
+}
+
+uint8_t *GCTensor::buffer() const
+{
+ return _allocator.data();
+}
+
+GLuint GCTensor::gc_buffer() const
+{
+ return _allocator.get_gl_ssbo_name();
+}
+
+void GCTensor::map(bool blocking)
+{
+ IGCTensor::map(blocking);
+}
+
+void GCTensor::unmap()
+{
+ IGCTensor::unmap();
+}
+
+uint8_t *GCTensor::do_map(bool blocking)
+{
+ return _allocator.map(blocking);
+}
+
+void GCTensor::do_unmap()
+{
+ _allocator.unmap();
+} \ No newline at end of file
diff --git a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
new file mode 100644
index 0000000000..694b34f1ec
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCTensorAllocator::GCTensorAllocator()
+ : _gl_buffer(), _mapping(nullptr)
+{
+}
+
+uint8_t *GCTensorAllocator::data()
+{
+ return _mapping;
+}
+
+void GCTensorAllocator::allocate()
+{
+ _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>();
+ ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+ ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(info().total_size()), nullptr, GL_STATIC_DRAW));
+ ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+ info().set_is_resizable(false);
+}
+
+void GCTensorAllocator::free()
+{
+ _gl_buffer.reset();
+ info().set_is_resizable(true);
+}
+
+uint8_t *GCTensorAllocator::lock()
+{
+ return map(true);
+}
+
+void GCTensorAllocator::unlock()
+{
+ unmap();
+}
+
+GLuint GCTensorAllocator::get_gl_ssbo_name() const
+{
+ return _gl_buffer->_ssbo_name;
+}
+
+uint8_t *GCTensorAllocator::map(bool blocking)
+{
+ ARM_COMPUTE_ERROR_ON(_mapping != nullptr);
+ ARM_COMPUTE_UNUSED(blocking);
+
+ ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+ void *p = ARM_COMPUTE_GL_CHECK(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, static_cast<GLsizeiptr>(info().total_size()), GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
+ _mapping = reinterpret_cast<uint8_t *>(p);
+
+ return _mapping;
+}
+
+void GCTensorAllocator::unmap()
+{
+ ARM_COMPUTE_ERROR_ON(_mapping == nullptr);
+
+ ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+ ARM_COMPUTE_GL_CHECK(glUnmapBuffer(GL_SHADER_STORAGE_BUFFER));
+ ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+ _mapping = nullptr;
+} \ No newline at end of file
diff --git a/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp
new file mode 100644
index 0000000000..19f178f445
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+IGCSimpleFunction::IGCSimpleFunction() //NOLINT
+ : _kernel(),
+ _border_handler()
+{
+}
+
+void IGCSimpleFunction::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the GLES kernel or function isn't configured");
+
+ // FIXME(APPBROWSER-300): We may need to rename "enqueue" to "dispatch" and "sync" to "memory_barrier".
+ GCScheduler::get().enqueue(_border_handler, false);
+ GCScheduler::get().sync();
+ GCScheduler::get().enqueue(*_kernel);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp b/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp
new file mode 100644
index 0000000000..781b357ce7
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCAbsoluteDifference::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCAbsoluteDifferenceKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp
new file mode 100644
index 0000000000..8686416616
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCActivationLayer::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCActivationLayerKernel>();
+ k->configure(input, output, act_info);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp
new file mode 100755
index 0000000000..2e546a663a
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCBatchNormalizationLayer::GCBatchNormalizationLayer()
+ : _norm_kernel()
+{
+}
+
+void GCBatchNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon)
+{
+ _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon);
+}
+
+void GCBatchNormalizationLayer::run()
+{
+ GCScheduler::get().enqueue(_norm_kernel, true);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
new file mode 100755
index 0000000000..ed756cf261
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCDepthConcatenate::GCDepthConcatenate() //NOLINT
+ : _concat_kernels_vector(),
+ _border_handlers_vector(),
+ _num_inputs(0)
+{
+}
+
+void GCDepthConcatenate::configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output) //NOLINT
+{
+ ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
+
+ _num_inputs = inputs_vector.size();
+
+ unsigned int depth_offset = 0;
+
+ _concat_kernels_vector = arm_compute::support::cpp14::make_unique<GCDepthConcatenateKernel[]>(_num_inputs);
+ _border_handlers_vector = arm_compute::support::cpp14::make_unique<GCFillBorderKernel[]>(_num_inputs);
+
+ for(unsigned int i = 0; i < _num_inputs; i++)
+ {
+ _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
+ _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+ depth_offset += inputs_vector.at(i)->info()->dimension(2);
+ }
+}
+
+void GCDepthConcatenate::run()
+{
+ for(unsigned i = 0; i < _num_inputs; i++)
+ {
+ GCScheduler::get().enqueue(_border_handlers_vector[i], false);
+ GCScheduler::get().enqueue(_concat_kernels_vector[i], true);
+ }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..ae9dd51b8e
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
+{
+ int kernel_size = weights->info()->dimension(0);
+
+ if(kernel_size == 1)
+ {
+ auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer1x1Kernel>();
+ k->configure(input, weights, biases, output, conv_info);
+ _kernel = std::move(k);
+ }
+ else if(kernel_size == 3)
+ {
+ auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer3x3Kernel>();
+ k->configure(input, weights, biases, output, conv_info);
+ _kernel = std::move(k);
+ }
+ else if(kernel_size == 5)
+ {
+ auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer5x5Kernel>();
+ k->configure(input, weights, biases, output, conv_info);
+ _kernel = std::move(k);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("kernel size unsupported!");
+ return;
+ }
+
+ _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp
new file mode 100644
index 0000000000..032c2fdb1e
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+using namespace arm_compute;
+
+GCDropoutLayer::GCDropoutLayer()
+ : _dropout_kernel()
+{
+}
+
+void GCDropoutLayer::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output);
+
+ // Configure kernel
+ _dropout_kernel.configure(input, mask, output, ratio, forward);
+}
+
+void GCDropoutLayer::run()
+{
+ GCScheduler::get().enqueue(_dropout_kernel);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp b/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp
new file mode 100644
index 0000000000..5c2431fa13
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCFillBorder::configure(IGCTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCFillBorderKernel>();
+ k->configure(tensor, BorderSize(border_width), border_mode, constant_border_value);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
new file mode 100644
index 0000000000..63cb40e616
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+#include <algorithm>
+
+using namespace arm_compute;
+
+void GCFullyConnectedLayerReshapeWeights::configure(const IGCTensor *input, IGCTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCTransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+GCFullyConnectedLayer::GCFullyConnectedLayer()
+ : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true),
+ _accumulate_biases(false)
+{
+}
+
+void GCFullyConnectedLayer::configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+
+ const DataType dt = input->info()->data_type();
+
+ // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
+
+ // Initialize output tensor for im2col
+ TensorShape shape_im2col;
+ shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2));
+ shape_im2col.set(1, input->info()->dimension(3));
+ shape_im2col.set(2, input->info()->dimension(4));
+ shape_im2col.set(3, input->info()->dimension(5));
+ _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt));
+
+ // Configure im2col kernel
+ _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+
+ // Configure matrix multiply kernel
+ _mm_kernel.configure(&_im2col_output, weights, output, 1.0f, false);
+
+ // Allocate the output tensor for im2col once all the configure methods have been called
+ _im2col_output.allocator()->allocate();
+}
+
+void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+ // Configure matrix multiply kernel
+ _mm_kernel.configure(input, weights, output, 1.0f, false);
+}
+
+void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+ ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2);
+
+ _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true;
+ _is_fc_after_conv = true;
+ _accumulate_biases = false;
+
+ if(biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+
+ _accumulate_biases = true;
+
+ // Configure accumulate biases kernel
+ _accumulate_biases_kernel.configure(output, biases);
+ }
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ const IGCTensor *weights_to_use = weights;
+
+ if(!_are_weights_reshaped)
+ {
+ weights_to_use = &_reshape_weights_output;
+
+ // Reshape the weights
+ _reshape_weights_kernel.configure(weights, &_reshape_weights_output);
+ }
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
+
+ if(is_batched_fc_layer)
+ {
+ _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
+ input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ _is_fc_after_conv = input->info()->num_dimensions() > 1;
+ }
+
+ if(_is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer without batches
+ configure_conv_fc(input, weights_to_use, output);
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer without batches
+ configure_fc_fc(input, weights_to_use, output);
+ }
+
+ // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
+ if(!_are_weights_reshaped)
+ {
+ // Allocate the tensor for the weights reshaped
+ _reshape_weights_output.allocator()->allocate();
+ }
+}
+
+void GCFullyConnectedLayer::run()
+{
+ // Reshape of the weights (happens only once)
+ if(!_are_weights_reshaped)
+ {
+ _are_weights_reshaped = true;
+ _reshape_weights_kernel.run();
+ }
+
+ // Linearize input if it comes from a convolutional layer
+ if(_is_fc_after_conv)
+ {
+ GCScheduler::get().enqueue(_im2col_kernel, false);
+ }
+
+ GCScheduler::get().sync();
+
+ // Run matrix multiply
+ GCScheduler::get().enqueue(_mm_kernel, !_accumulate_biases);
+
+ // Accumulate biases if provided
+ if(_accumulate_biases)
+ {
+ GCScheduler::get().sync();
+
+ GCScheduler::get().enqueue(_accumulate_biases_kernel);
+ }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
new file mode 100644
index 0000000000..c47a0e71fb
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "arm_compute/runtime/ITensorAllocator.h"
+
+using namespace arm_compute;
+
+GCGEMM::GCGEMM()
+ : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false)
+{
+}
+
+void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output);
+
+ if(c != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c);
+ ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A");
+ ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C");
+ ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of rows as the output matrix");
+ ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix");
+ }
+
+ ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
+
+ // If the input tensor has less than 16 rows, we run a special version of GEMM without reshaping the input tensors
+ _is_interleaved_transposed = a->info()->dimension(1) > 16;
+
+ const IGCTensor *matrix_a = a;
+ const IGCTensor *matrix_b = b;
+
+ if(_is_interleaved_transposed)
+ {
+ matrix_a = &_tmp_a;
+ matrix_b = &_tmp_b;
+
+ TensorShape shape_tmp_a = a->info()->tensor_shape();
+ TensorShape shape_tmp_b = b->info()->tensor_shape();
+
+ shape_tmp_a.set(0, a->info()->dimension(0) * 4);
+ shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.0f));
+
+ const unsigned int transpose_w = max_gc_vector_width / data_size_from_type(b->info()->data_type());
+ shape_tmp_b.set(0, b->info()->dimension(1) * transpose_w);
+ shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / static_cast<float>(transpose_w)));
+
+ TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position());
+ _tmp_a.allocator()->init(info_a);
+
+ TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position());
+ _tmp_b.allocator()->init(info_b);
+
+ // Configure interleave kernel
+ _interleave_kernel.configure(a, &_tmp_a);
+
+ // Configure transpose kernel
+ _transpose_kernel.configure(b, &_tmp_b);
+ }
+
+ _mm_kernel.configure(matrix_a, matrix_b, output, alpha, _is_interleaved_transposed);
+
+ if(_is_interleaved_transposed)
+ {
+ // Allocate intermediate tensors
+ _tmp_a.allocator()->allocate();
+ _tmp_b.allocator()->allocate();
+ }
+
+ // Configure matrix addition kernel
+ if(beta != 0 && c != nullptr)
+ {
+ _ma_kernel.configure(c, output, beta);
+ _run_addition = true;
+ }
+}
+
+void GCGEMM::run()
+{
+ if(_is_interleaved_transposed)
+ {
+ // Run interleave kernel
+ GCScheduler::get().enqueue(_interleave_kernel, false);
+
+ // Run transpose kernel
+ GCScheduler::get().enqueue(_transpose_kernel, false);
+ }
+
+ // Run matrix multiply kernel
+ GCScheduler::get().enqueue(_mm_kernel, !_run_addition);
+
+ // Run matrix addition kernel
+ if(_run_addition)
+ {
+ GCScheduler::get().enqueue(_ma_kernel);
+ }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp
new file mode 100644
index 0000000000..44c940e126
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCGEMMInterleave4x4::configure(const IGCTensor *input, IGCTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCGEMMInterleave4x4Kernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp
new file mode 100644
index 0000000000..893fa5572b
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/Types.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCGEMMTranspose1xW::configure(const IGCTensor *input, IGCTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCGEMMTranspose1xWKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
new file mode 100644
index 0000000000..d30ed52d5c
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCNormalizationLayer::GCNormalizationLayer()
+ : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler()
+{
+}
+
+void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const NormalizationLayerInfo &norm_info)
+{
+ ARM_COMPUTE_ERROR_ON(input == nullptr);
+
+ _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type()));
+
+ _norm_kernel.configure(input, &_squared_input, output, norm_info);
+ _multiply_kernel.configure(input, input, &_squared_input, 1.0f);
+ // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
+ _border_handler.configure(&_squared_input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+ // Allocate intermediate buffers
+ _squared_input.allocator()->allocate();
+}
+
+void GCNormalizationLayer::run()
+{
+ GCScheduler::get().enqueue(_multiply_kernel, false);
+ GCScheduler::get().enqueue(_border_handler, false);
+ GCScheduler::get().enqueue(_norm_kernel, false);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp b/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp
new file mode 100755
index 0000000000..0cd87ea875
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCPixelWiseMultiplication::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCPixelWiseMultiplicationKernel>();
+ k->configure(input1, input2, output, scale);
+ _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
new file mode 100644
index 0000000000..46a60cddef
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
+{
+ // Configure pooling kernel
+ auto k = arm_compute::support::cpp14::make_unique<GCPoolingLayerKernel>();
+ k->configure(input, output, pool_info);
+ _kernel = std::move(k);
+
+ // Configure border depending on operation required
+ BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
+ _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0.0f));
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
new file mode 100644
index 0000000000..d7d47d2802
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCSoftmaxLayer::GCSoftmaxLayer()
+ : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp()
+{
+}
+
+void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+
+ // Create intermediate tensors shapes
+ _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+
+ TensorShape shape = input->info()->tensor_shape();
+ shape.set(0, 1);
+ TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+ _max.allocator()->init(tensor_info_max_sum);
+ _sum.allocator()->init(tensor_info_max_sum);
+
+ // Configure Kernels
+ _max_kernel.configure(input, &_max);
+ _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
+ _norm_kernel.configure(&_tmp, &_sum, output);
+
+ // Allocate intermediate buffers
+ _tmp.allocator()->allocate();
+ _max.allocator()->allocate();
+ _sum.allocator()->allocate();
+}
+
+void GCSoftmaxLayer::run()
+{
+ GCScheduler::get().enqueue(_max_kernel, false);
+ GCScheduler::get().enqueue(_shift_exp_sum_kernel, false);
+ GCScheduler::get().enqueue(_norm_kernel);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp b/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp
new file mode 100644
index 0000000000..c2dc122e64
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCTranspose::configure(const IGCTensor *input, IGCTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<GCTransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}