From 23c26277086c78704a17f0dae86da947816320c0 Mon Sep 17 00:00:00 2001 From: George Gekov Date: Mon, 16 Aug 2021 11:32:10 +0100 Subject: MLECO-2079 Adding the C++ KWS example Signed-off-by: Eanna O Cathain Change-Id: I81899bbfaada32f478c2e2fc6441eabb94d8d0fc --- .../SpeechRecognition/test/AudioCaptureTest.cpp | 61 ---------------------- samples/SpeechRecognition/test/MFCCTest.cpp | 20 +++---- samples/SpeechRecognition/test/PreprocessTest.cpp | 9 ++-- 3 files changed, 16 insertions(+), 74 deletions(-) delete mode 100644 samples/SpeechRecognition/test/AudioCaptureTest.cpp (limited to 'samples/SpeechRecognition/test') diff --git a/samples/SpeechRecognition/test/AudioCaptureTest.cpp b/samples/SpeechRecognition/test/AudioCaptureTest.cpp deleted file mode 100644 index 94b4e7cb7a..0000000000 --- a/samples/SpeechRecognition/test/AudioCaptureTest.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#define CATCH_CONFIG_MAIN -#include -#include - -#include "AudioCapture.hpp" - -TEST_CASE("Test capture of audio file") -{ - std::string testResources = TEST_RESOURCE_DIR; - REQUIRE(testResources != ""); - std::string file = testResources + "/" + "myVoiceIsMyPassportVerifyMe04.wav"; - asr::AudioCapture capture; - std::vector audioData = capture.LoadAudioFile(file); - capture.InitSlidingWindow(audioData.data(), audioData.size(), 47712, 16000); - - std::vector firstAudioBlock = capture.Next(); - float actual1 = firstAudioBlock.at(0); - float actual2 = firstAudioBlock.at(47000); - CHECK(std::to_string(actual1) == "0.000352"); - CHECK(std::to_string(actual2) == "-0.056441"); - CHECK(firstAudioBlock.size() == 47712); - - CHECK(capture.HasNext() == true); - - std::vector secondAudioBlock = capture.Next(); - float actual3 = secondAudioBlock.at(0); - float actual4 = secondAudioBlock.at(47000); - CHECK(std::to_string(actual3) == "0.102077"); - CHECK(std::to_string(actual4) == "0.000194"); - CHECK(capture.HasNext() == true); - - std::vector thirdAudioBlock = capture.Next(); - float actual5 = thirdAudioBlock.at(0); - float actual6 = thirdAudioBlock.at(33500); - float actual7 = thirdAudioBlock.at(33600); - CHECK(std::to_string(actual5) == "-0.076416"); - CHECK(std::to_string(actual6) == "-0.000275"); - CHECK(std::to_string(actual7) == "0.000000"); - CHECK(capture.HasNext() == false); -} - -TEST_CASE("Test sliding window of audio capture") -{ - std::string testResources = TEST_RESOURCE_DIR; - REQUIRE(testResources != ""); - std::string file = testResources + "/" + "myVoiceIsMyPassportVerifyMe04.wav"; - asr::AudioCapture capture; - std::vector audioData = capture.LoadAudioFile(file); - capture.InitSlidingWindow(audioData.data(), audioData.size(), 47712, 16000); - capture.Next(); - capture.Next(); - - CHECK(capture.HasNext() == true); - capture.Next(); - CHECK(capture.HasNext() == false); -} diff --git a/samples/SpeechRecognition/test/MFCCTest.cpp b/samples/SpeechRecognition/test/MFCCTest.cpp index 2a552643d5..62a92fd5ba 100644 --- a/samples/SpeechRecognition/test/MFCCTest.cpp +++ b/samples/SpeechRecognition/test/MFCCTest.cpp @@ -6,9 +6,10 @@ #include #include -#include "MFCC.hpp" +#include "Wav2LetterMFCC.hpp" -const std::vector testWav = std::vector{ +const std::vector testWav = std::vector +{ -3.0f, 0.0f, 1.0f, -1.0f, 2.0f, 3.0f, -2.0f, 2.0f, 1.0f, -2.0f, 0.0f, 3.0f, -1.0f, 8.0f, 3.0f, 2.0f, -1.0f, -1.0f, 2.0f, 7.0f, 3.0f, 5.0f, 6.0f, 6.0f, @@ -84,15 +85,16 @@ TEST_CASE("Test MFCC") std::vector fullAudioData; - for (auto f : testWav) - { - fullAudioData.emplace_back( f / (1<<15)); - } - + for (auto f : testWav) + { + fullAudioData.emplace_back( f / (1<<15)); + } - MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, frameLenSamples, false, 1); + MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, + frameLenSamples, false, 1); - MFCC mfccInst = MFCC(mfccParams); + Wav2LetterMFCC mfccInst = Wav2LetterMFCC(mfccParams); + mfccInst.Init(); auto mfccOutput = mfccInst.MfccCompute(fullAudioData); std::vector expected = { -834.96564f, 21.02699f, 18.62856f, 7.3412f, 18.90791f, -5.36034f, 6.52351f, diff --git a/samples/SpeechRecognition/test/PreprocessTest.cpp b/samples/SpeechRecognition/test/PreprocessTest.cpp index 2b98831fda..f1127470fd 100644 --- a/samples/SpeechRecognition/test/PreprocessTest.cpp +++ b/samples/SpeechRecognition/test/PreprocessTest.cpp @@ -6,8 +6,8 @@ #include #include -#include "Preprocess.hpp" #include "DataStructures.hpp" +#include "Wav2LetterPreprocessor.hpp" void PopulateTestWavVector(std::vector& vec) { @@ -51,9 +51,10 @@ TEST_CASE("Preprocessing calculation INT8") /* Populate with dummy input */ PopulateTestWavVector(testWav1); - MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, frameLenSamples, false, numMfccVectors); + MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, + frameLenSamples, false, numMfccVectors); - MFCC mfccInst = MFCC(mfccParams); + std::unique_ptr mfccInst = std::make_unique(mfccParams); std::vector fullAudioData; @@ -65,7 +66,7 @@ TEST_CASE("Preprocessing calculation INT8") } } - Preprocess prep(frameLenSamples, windowStride, mfccInst); + Wav2LetterPreprocessor prep(frameLenSamples, windowStride, std::move(mfccInst)); std::vector outputBuffer(outputBufferSize); -- cgit v1.2.1