1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
#if (defined(__aarch64__)) || (defined(__x86_64__)) // disable test failing on FireFly/Armv7
#include "ClWorkloadFactoryHelper.hpp"
#include <test/TensorHelpers.hpp>
#include <backendsCommon/TensorHandle.hpp>
#include <backendsCommon/WorkloadFactory.hpp>
#include <cl/ClContextControl.hpp>
#include <cl/ClWorkloadFactory.hpp>
#include <cl/OpenClTimer.hpp>
#include <backendsCommon/test/TensorCopyUtils.hpp>
#include <backendsCommon/test/WorkloadTestUtils.hpp>
#include <arm_compute/runtime/CL/CLScheduler.h>
#include <doctest/doctest.h>
#include <iostream>
using namespace armnn;
struct OpenClFixture
{
// Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case.
// NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution
// times from OpenClTimer.
OpenClFixture() : m_ClContextControl(nullptr, nullptr, true) {}
~OpenClFixture() {}
ClContextControl m_ClContextControl;
};
TEST_CASE_FIXTURE(OpenClFixture, "OpenClTimerBatchNorm")
{
//using FactoryType = ClWorkloadFactory;
auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
ClWorkloadFactory workloadFactory = ClWorkloadFactoryHelper::GetFactory(memoryManager);
const unsigned int width = 2;
const unsigned int height = 3;
const unsigned int channels = 2;
const unsigned int num = 1;
TensorInfo inputTensorInfo( {num, channels, height, width}, DataType::Float32);
TensorInfo outputTensorInfo({num, channels, height, width}, DataType::Float32);
TensorInfo tensorInfo({channels}, DataType::Float32);
std::vector<float> input =
{
1.f, 4.f,
4.f, 2.f,
1.f, 6.f,
1.f, 1.f,
4.f, 1.f,
-2.f, 4.f
};
// these values are per-channel of the input
std::vector<float> mean = { 3.f, -2.f };
std::vector<float> variance = { 4.f, 9.f };
std::vector<float> beta = { 3.f, 2.f };
std::vector<float> gamma = { 2.f, 1.f };
ARMNN_NO_DEPRECATE_WARN_BEGIN
std::unique_ptr<ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
std::unique_ptr<ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
ARMNN_NO_DEPRECATE_WARN_END
BatchNormalizationQueueDescriptor data;
WorkloadInfo info;
ScopedTensorHandle meanTensor(tensorInfo);
ScopedTensorHandle varianceTensor(tensorInfo);
ScopedTensorHandle betaTensor(tensorInfo);
ScopedTensorHandle gammaTensor(tensorInfo);
AllocateAndCopyDataToITensorHandle(&meanTensor, mean.data());
AllocateAndCopyDataToITensorHandle(&varianceTensor, variance.data());
AllocateAndCopyDataToITensorHandle(&betaTensor, beta.data());
AllocateAndCopyDataToITensorHandle(&gammaTensor, gamma.data());
AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
data.m_Mean = &meanTensor;
data.m_Variance = &varianceTensor;
data.m_Beta = &betaTensor;
data.m_Gamma = &gammaTensor;
data.m_Parameters.m_Eps = 0.0f;
// for each channel:
// substract mean, divide by standard deviation (with an epsilon to avoid div by 0)
// multiply by gamma and add beta
std::unique_ptr<IWorkload> workload = workloadFactory.CreateBatchNormalization(data, info);
inputHandle->Allocate();
outputHandle->Allocate();
CopyDataToITensorHandle(inputHandle.get(), input.data());
OpenClTimer openClTimer;
CHECK_EQ(openClTimer.GetName(), "OpenClKernelTimer");
//Start the timer
openClTimer.Start();
//Execute the workload
workload->Execute();
//Stop the timer
openClTimer.Stop();
CHECK_EQ(openClTimer.GetMeasurements().size(), 1);
CHECK_EQ(openClTimer.GetMeasurements().front().m_Name,
"OpenClKernelTimer/0: batchnormalization_layer_nchw GWS[1,3,2]");
CHECK(openClTimer.GetMeasurements().front().m_Value > 0);
}
#endif //aarch64 or x86_64
|