aboutsummaryrefslogtreecommitdiff
path: root/samples/SpeechRecognition/test
diff options
context:
space:
mode:
Diffstat (limited to 'samples/SpeechRecognition/test')
-rw-r--r--samples/SpeechRecognition/test/AudioCaptureTest.cpp61
-rw-r--r--samples/SpeechRecognition/test/MFCCTest.cpp20
-rw-r--r--samples/SpeechRecognition/test/PreprocessTest.cpp9
3 files changed, 16 insertions, 74 deletions
diff --git a/samples/SpeechRecognition/test/AudioCaptureTest.cpp b/samples/SpeechRecognition/test/AudioCaptureTest.cpp
deleted file mode 100644
index 94b4e7cb7a..0000000000
--- a/samples/SpeechRecognition/test/AudioCaptureTest.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#define CATCH_CONFIG_MAIN
-#include <catch.hpp>
-#include <limits>
-
-#include "AudioCapture.hpp"
-
-TEST_CASE("Test capture of audio file")
-{
- std::string testResources = TEST_RESOURCE_DIR;
- REQUIRE(testResources != "");
- std::string file = testResources + "/" + "myVoiceIsMyPassportVerifyMe04.wav";
- asr::AudioCapture capture;
- std::vector<float> audioData = capture.LoadAudioFile(file);
- capture.InitSlidingWindow(audioData.data(), audioData.size(), 47712, 16000);
-
- std::vector<float> firstAudioBlock = capture.Next();
- float actual1 = firstAudioBlock.at(0);
- float actual2 = firstAudioBlock.at(47000);
- CHECK(std::to_string(actual1) == "0.000352");
- CHECK(std::to_string(actual2) == "-0.056441");
- CHECK(firstAudioBlock.size() == 47712);
-
- CHECK(capture.HasNext() == true);
-
- std::vector<float> secondAudioBlock = capture.Next();
- float actual3 = secondAudioBlock.at(0);
- float actual4 = secondAudioBlock.at(47000);
- CHECK(std::to_string(actual3) == "0.102077");
- CHECK(std::to_string(actual4) == "0.000194");
- CHECK(capture.HasNext() == true);
-
- std::vector<float> thirdAudioBlock = capture.Next();
- float actual5 = thirdAudioBlock.at(0);
- float actual6 = thirdAudioBlock.at(33500);
- float actual7 = thirdAudioBlock.at(33600);
- CHECK(std::to_string(actual5) == "-0.076416");
- CHECK(std::to_string(actual6) == "-0.000275");
- CHECK(std::to_string(actual7) == "0.000000");
- CHECK(capture.HasNext() == false);
-}
-
-TEST_CASE("Test sliding window of audio capture")
-{
- std::string testResources = TEST_RESOURCE_DIR;
- REQUIRE(testResources != "");
- std::string file = testResources + "/" + "myVoiceIsMyPassportVerifyMe04.wav";
- asr::AudioCapture capture;
- std::vector<float> audioData = capture.LoadAudioFile(file);
- capture.InitSlidingWindow(audioData.data(), audioData.size(), 47712, 16000);
- capture.Next();
- capture.Next();
-
- CHECK(capture.HasNext() == true);
- capture.Next();
- CHECK(capture.HasNext() == false);
-}
diff --git a/samples/SpeechRecognition/test/MFCCTest.cpp b/samples/SpeechRecognition/test/MFCCTest.cpp
index 2a552643d5..62a92fd5ba 100644
--- a/samples/SpeechRecognition/test/MFCCTest.cpp
+++ b/samples/SpeechRecognition/test/MFCCTest.cpp
@@ -6,9 +6,10 @@
#include <catch.hpp>
#include <limits>
-#include "MFCC.hpp"
+#include "Wav2LetterMFCC.hpp"
-const std::vector<float> testWav = std::vector<float>{
+const std::vector<float> testWav = std::vector<float>
+{
-3.0f, 0.0f, 1.0f, -1.0f, 2.0f, 3.0f, -2.0f, 2.0f,
1.0f, -2.0f, 0.0f, 3.0f, -1.0f, 8.0f, 3.0f, 2.0f,
-1.0f, -1.0f, 2.0f, 7.0f, 3.0f, 5.0f, 6.0f, 6.0f,
@@ -84,15 +85,16 @@ TEST_CASE("Test MFCC")
std::vector<float> fullAudioData;
- for (auto f : testWav)
- {
- fullAudioData.emplace_back( f / (1<<15));
- }
-
+ for (auto f : testWav)
+ {
+ fullAudioData.emplace_back( f / (1<<15));
+ }
- MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, frameLenSamples, false, 1);
+ MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats,
+ frameLenSamples, false, 1);
- MFCC mfccInst = MFCC(mfccParams);
+ Wav2LetterMFCC mfccInst = Wav2LetterMFCC(mfccParams);
+ mfccInst.Init();
auto mfccOutput = mfccInst.MfccCompute(fullAudioData);
std::vector<float> expected = { -834.96564f, 21.02699f, 18.62856f, 7.3412f, 18.90791f, -5.36034f, 6.52351f,
diff --git a/samples/SpeechRecognition/test/PreprocessTest.cpp b/samples/SpeechRecognition/test/PreprocessTest.cpp
index 2b98831fda..f1127470fd 100644
--- a/samples/SpeechRecognition/test/PreprocessTest.cpp
+++ b/samples/SpeechRecognition/test/PreprocessTest.cpp
@@ -6,8 +6,8 @@
#include <catch.hpp>
#include <limits>
-#include "Preprocess.hpp"
#include "DataStructures.hpp"
+#include "Wav2LetterPreprocessor.hpp"
void PopulateTestWavVector(std::vector<int16_t>& vec)
{
@@ -51,9 +51,10 @@ TEST_CASE("Preprocessing calculation INT8")
/* Populate with dummy input */
PopulateTestWavVector(testWav1);
- MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats, frameLenSamples, false, numMfccVectors);
+ MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats,
+ frameLenSamples, false, numMfccVectors);
- MFCC mfccInst = MFCC(mfccParams);
+ std::unique_ptr<Wav2LetterMFCC> mfccInst = std::make_unique<Wav2LetterMFCC>(mfccParams);
std::vector<float> fullAudioData;
@@ -65,7 +66,7 @@ TEST_CASE("Preprocessing calculation INT8")
}
}
- Preprocess prep(frameLenSamples, windowStride, mfccInst);
+ Wav2LetterPreprocessor prep(frameLenSamples, windowStride, std::move(mfccInst));
std::vector<int8_t> outputBuffer(outputBufferSize);