diff options
author | George Gekov <george.gekov@arm.com> | 2021-08-16 11:32:10 +0100 |
---|---|---|
committer | Jim Flynn <jim.flynn@arm.com> | 2022-02-05 19:49:06 +0000 |
commit | 23c26277086c78704a17f0dae86da947816320c0 (patch) | |
tree | 88b02fd1fae3130256d059251788a7ef68d2831f /samples/SpeechRecognition/test | |
parent | 922b912fd2d462bac0809bac5669310ad1506310 (diff) | |
download | armnn-23c26277086c78704a17f0dae86da947816320c0.tar.gz |
MLECO-2079 Adding the C++ KWS example
Signed-off-by: Eanna O Cathain <eanna.ocathain@arm.com>
Change-Id: I81899bbfaada32f478c2e2fc6441eabb94d8d0fc
Diffstat (limited to 'samples/SpeechRecognition/test')
-rw-r--r-- | samples/SpeechRecognition/test/AudioCaptureTest.cpp | 61 | ||||
-rw-r--r-- | samples/SpeechRecognition/test/MFCCTest.cpp | 20 | ||||
-rw-r--r-- | samples/SpeechRecognition/test/PreprocessTest.cpp | 9 |
3 files changed, 16 insertions, 74 deletions
diff --git a/samples/SpeechRecognition/test/AudioCaptureTest.cpp b/samples/SpeechRecognition/test/AudioCaptureTest.cpp deleted file mode 100644 index 94b4e7cb7a..0000000000 --- a/samples/SpeechRecognition/test/AudioCaptureTest.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#define CATCH_CONFIG_MAIN -#include <catch.hpp> -#include <limits> - -#include "AudioCapture.hpp" - -TEST_CASE("Test capture of audio file") -{ - std::string testResources = TEST_RESOURCE_DIR; - REQUIRE(testResources != ""); - std::string file = testResources + "/" + "myVoiceIsMyPassportVerifyMe04.wav"; - asr::AudioCapture capture; - std::vector<float> audioData = capture.LoadAudioFile(file); - capture.InitSlidingWindow(audioData.data(), audioData.size(), 47712, 16000); - - std::vector<float> firstAudioBlock = capture.Next(); - float actual1 = firstAudioBlock.at(0); - float actual2 = firstAudioBlock.at(47000); - CHECK(std::to_string(actual1) == "0.000352"); - CHECK(std::to_string(actual2) == "-0.056441"); - CHECK(firstAudioBlock.size() == 47712); - - CHECK(capture.HasNext() == true); - - std::vector<float> secondAudioBlock = capture.Next(); - float actual3 = secondAudioBlock.at(0); - float actual4 = secondAudioBlock.at(47000); - CHECK(std::to_string(actual3) == "0.102077"); - CHECK(std::to_string(actual4) == "0.000194"); - CHECK(capture.HasNext() == true); - - std::vector<float> thirdAudioBlock = capture.Next(); - float actual5 = thirdAudioBlock.at(0); - float actual6 = thirdAudioBlock.at(33500); - float actual7 = thirdAudioBlock.at(33600); - CHECK(std::to_string(actual5) == "-0.076416"); - CHECK(std::to_string(actual6) == "-0.000275"); - CHECK(std::to_string(actual7) == "0.000000"); - CHECK(capture.HasNext() == false); -} - -TEST_CASE("Test sliding window of audio capture") -{ - std::string testResources = TEST_RESOURCE_DIR; - REQUIRE(testResources != ""); - std::string file = testResources + "/" + "myVoiceIsMyPassportVerifyMe04.wav"; - asr::AudioCapture capture; - std::vector<float> audioData = capture.LoadAudioFile(file); - capture.InitSlidingWindow(audioData.data(), audioData.size(), 47712, 16000); - capture.Next(); - capture.Next(); - - CHECK(capture.HasNext() == true); - capture.Next(); - CHECK(capture.HasNext() == false); -} diff --git a/samples/SpeechRecognition/test/MFCCTest.cpp b/samples/SpeechRecognition/test/MFCCTest.cpp index 2a552643d5..62a92fd5ba 100644 --- a/samples/SpeechRecognition/test/MFCCTest.cpp +++ b/samples/SpeechRecognition/test/MFCCTest.cpp @@ -6,9 +6,10 @@ #include <catch.hpp> #include <limits> -#include "MFCC.hpp" +#include "Wav2LetterMFCC.hpp" -const std::vector<float> testWav = std::vector<float>{ +const std::vector<float> testWav = std::vector<float> +{ -3.0f, 0.0f, 1.0f, -1.0f, 2.0f, 3.0f, -2.0f, 2.0f, 1.0f, -2.0f, 0.0f, 3.0f, -1.0f, 8.0f, 3.0f, 2.0f, -1.0f, -1.0f, 2.0f, 7.0f, 3.0f, 5.0f, 6.0f, 6.0f, @@ -84,15 +85,16 @@ TEST_CASE("Test MFCC") std::vector<float> fullAudioData; - for (auto f : testWav) - { - fullAudioData.emplace_back( f / (1<<15)); - } - + for (auto f : testWav) + { + fullAudioData.emplace_back( f / (1<<15)); + } - MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, frameLenSamples, false, 1); + MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, + frameLenSamples, false, 1); - MFCC mfccInst = MFCC(mfccParams); + Wav2LetterMFCC mfccInst = Wav2LetterMFCC(mfccParams); + mfccInst.Init(); auto mfccOutput = mfccInst.MfccCompute(fullAudioData); std::vector<float> expected = { -834.96564f, 21.02699f, 18.62856f, 7.3412f, 18.90791f, -5.36034f, 6.52351f, diff --git a/samples/SpeechRecognition/test/PreprocessTest.cpp b/samples/SpeechRecognition/test/PreprocessTest.cpp index 2b98831fda..f1127470fd 100644 --- a/samples/SpeechRecognition/test/PreprocessTest.cpp +++ b/samples/SpeechRecognition/test/PreprocessTest.cpp @@ -6,8 +6,8 @@ #include <catch.hpp> #include <limits> -#include "Preprocess.hpp" #include "DataStructures.hpp" +#include "Wav2LetterPreprocessor.hpp" void PopulateTestWavVector(std::vector<int16_t>& vec) { @@ -51,9 +51,10 @@ TEST_CASE("Preprocessing calculation INT8") /* Populate with dummy input */ PopulateTestWavVector(testWav1); - MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, frameLenSamples, false, numMfccVectors); + MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, + frameLenSamples, false, numMfccVectors); - MFCC mfccInst = MFCC(mfccParams); + std::unique_ptr<Wav2LetterMFCC> mfccInst = std::make_unique<Wav2LetterMFCC>(mfccParams); std::vector<float> fullAudioData; @@ -65,7 +66,7 @@ TEST_CASE("Preprocessing calculation INT8") } } - Preprocess prep(frameLenSamples, windowStride, mfccInst); + Wav2LetterPreprocessor prep(frameLenSamples, windowStride, std::move(mfccInst)); std::vector<int8_t> outputBuffer(outputBufferSize); |