mirror of
https://github.com/Bijit-Mondal/VoiceAgent.git
synced 2026-03-02 18:36:39 +00:00
- Added ConversationManager for managing conversation history with configurable limits. - Implemented InputQueue for serial processing of input items. - Created SpeechManager for handling text-to-speech generation and streaming. - Developed StreamProcessor for processing LLM streams and forwarding events. - Added TranscriptionManager for audio transcription using AI SDK. - Introduced WebSocketManager for managing WebSocket connections and messaging. - Updated VoiceAgent to support new architecture and improved socket handling. - Refactored index files to export new core components.
106 lines
4.2 KiB
JavaScript
106 lines
4.2 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.TranscriptionManager = void 0;
|
|
const events_1 = require("events");
|
|
const ai_1 = require("ai");
|
|
const types_1 = require("../types");
|
|
/**
|
|
* Handles audio transcription using the AI SDK transcription model
|
|
* and validation of incoming audio data.
|
|
*/
|
|
class TranscriptionManager extends events_1.EventEmitter {
|
|
transcriptionModel;
|
|
maxAudioInputSize;
|
|
/** Callback to send messages over the WebSocket */
|
|
sendMessage = () => { };
|
|
constructor(options = {}) {
|
|
super();
|
|
this.transcriptionModel = options.transcriptionModel;
|
|
this.maxAudioInputSize =
|
|
options.maxAudioInputSize ?? types_1.DEFAULT_MAX_AUDIO_SIZE;
|
|
}
|
|
get hasTranscriptionModel() {
|
|
return !!this.transcriptionModel;
|
|
}
|
|
/**
|
|
* Transcribe audio data to text.
|
|
*/
|
|
async transcribeAudio(audioData) {
|
|
if (!this.transcriptionModel) {
|
|
throw new Error("Transcription model not configured");
|
|
}
|
|
console.log(`Sending ${audioData.byteLength} bytes to Whisper for transcription`);
|
|
try {
|
|
const result = await (0, ai_1.experimental_transcribe)({
|
|
model: this.transcriptionModel,
|
|
audio: audioData,
|
|
});
|
|
console.log(`Whisper transcription result: "${result.text}", language: ${result.language || "unknown"}`);
|
|
this.emit("transcription", {
|
|
text: result.text,
|
|
language: result.language,
|
|
});
|
|
// Send transcription to client for immediate feedback
|
|
this.sendMessage({
|
|
type: "transcription_result",
|
|
text: result.text,
|
|
language: result.language,
|
|
});
|
|
return result.text;
|
|
}
|
|
catch (error) {
|
|
console.error("Whisper transcription failed:", error);
|
|
throw error;
|
|
}
|
|
}
|
|
/**
|
|
* Process incoming base64-encoded audio: validate, decode, transcribe.
|
|
* Returns the transcribed text, or null if invalid / empty.
|
|
*/
|
|
async processAudioInput(base64Audio, format) {
|
|
if (!this.transcriptionModel) {
|
|
const error = new Error("Transcription model not configured for audio input");
|
|
this.emit("error", error);
|
|
this.sendMessage({ type: "error", error: error.message });
|
|
return null;
|
|
}
|
|
try {
|
|
const audioBuffer = Buffer.from(base64Audio, "base64");
|
|
// Validate audio size
|
|
if (audioBuffer.length > this.maxAudioInputSize) {
|
|
const sizeMB = (audioBuffer.length / (1024 * 1024)).toFixed(1);
|
|
const maxMB = (this.maxAudioInputSize / (1024 * 1024)).toFixed(1);
|
|
this.emit("error", new Error(`Audio input too large (${sizeMB} MB). Maximum allowed: ${maxMB} MB`));
|
|
return null;
|
|
}
|
|
if (audioBuffer.length === 0) {
|
|
this.emit("warning", "Received empty audio data");
|
|
return null;
|
|
}
|
|
this.emit("audio_received", { size: audioBuffer.length, format });
|
|
console.log(`Processing audio input: ${audioBuffer.length} bytes, format: ${format || "unknown"}`);
|
|
const transcribedText = await this.transcribeAudio(audioBuffer);
|
|
console.log(`Transcribed text: "${transcribedText}"`);
|
|
if (!transcribedText.trim()) {
|
|
this.emit("warning", "Transcription returned empty text");
|
|
this.sendMessage({
|
|
type: "transcription_error",
|
|
error: "Whisper returned empty text",
|
|
});
|
|
return null;
|
|
}
|
|
return transcribedText;
|
|
}
|
|
catch (error) {
|
|
console.error("Failed to process audio input:", error);
|
|
this.emit("error", error);
|
|
this.sendMessage({
|
|
type: "transcription_error",
|
|
error: `Transcription failed: ${error.message || String(error)}`,
|
|
});
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
exports.TranscriptionManager = TranscriptionManager;
|
|
//# sourceMappingURL=TranscriptionManager.js.map
|