1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
| #include <QCoreApplication> #include <QAudioInput> #include <QBuffer> #include <QFile> #include <vosk_api.h> #include <iostream>
class SpeechRecognizer : public QObject { Q_OBJECT public: SpeechRecognizer(const QString &modelPath, QObject *parent = nullptr) : QObject(parent) { model = vosk_model_new(modelPath.toStdString().c_str()); recognizer = vosk_recognizer_new(model, 16000.0); }
~SpeechRecognizer() { vosk_recognizer_free(recognizer); vosk_model_free(model); }
void startRecognition() { QAudioFormat format; format.setSampleRate(16000); format.setChannelCount(1); format.setSampleSize(16); format.setCodec("audio/pcm"); format.setByteOrder(QAudioFormat::LittleEndian); format.setSampleType(QAudioFormat::SignedInt);
audioInput = new QAudioInput(format, this); audioBuffer.open(QIODevice::WriteOnly | QIODevice::Truncate); audioInput->start(&audioBuffer);
connect(audioInput, &QAudioInput::stateChanged, this, &SpeechRecognizer::onStateChanged); }
private slots: void onStateChanged(QAudio::State state) { if (state == QAudio::IdleState) { audioInput->stop(); audioBuffer.close(); processAudio(); } }
void processAudio() { QByteArray audioData = audioBuffer.buffer(); int length = audioData.size(); const char *data = audioData.data();
if (vosk_recognizer_accept_waveform(recognizer, data, length)) { std::cout << vosk_recognizer_result(recognizer) << std::endl; } else { std::cout << vosk_recognizer_partial_result(recognizer) << std::endl; } }
private: VoskModel *model; VoskRecognizer *recognizer; QAudioInput *audioInput; QBuffer audioBuffer; };
int main(int argc, char *argv[]) { QCoreApplication app(argc, argv);
QString modelPath = "/path/to/vosk-model"; SpeechRecognizer recognizer(modelPath); recognizer.startRecognition();
return app.exec(); }
|