feat: Introduce new core components for conversation and speech management

- Added ConversationManager for managing conversation history with configurable limits.
- Implemented InputQueue for serial processing of input items.
- Created SpeechManager for handling text-to-speech generation and streaming.
- Developed StreamProcessor for processing LLM streams and forwarding events.
- Added TranscriptionManager for audio transcription using AI SDK.
- Introduced WebSocketManager for managing WebSocket connections and messaging.
- Updated VoiceAgent to support new architecture and improved socket handling.
- Refactored index files to export new core components.
This commit is contained in:
Bijit Mondal
2026-02-23 16:15:49 +05:30
parent 4dd30b89c0
commit 5e7eb469ae
71 changed files with 5175 additions and 19 deletions

106
dist/core/TranscriptionManager.js vendored Normal file
View File

@@ -0,0 +1,106 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.TranscriptionManager = void 0;
const events_1 = require("events");
const ai_1 = require("ai");
const types_1 = require("../types");
/**
* Handles audio transcription using the AI SDK transcription model
* and validation of incoming audio data.
*/
class TranscriptionManager extends events_1.EventEmitter {
transcriptionModel;
maxAudioInputSize;
/** Callback to send messages over the WebSocket */
sendMessage = () => { };
constructor(options = {}) {
super();
this.transcriptionModel = options.transcriptionModel;
this.maxAudioInputSize =
options.maxAudioInputSize ?? types_1.DEFAULT_MAX_AUDIO_SIZE;
}
get hasTranscriptionModel() {
return !!this.transcriptionModel;
}
/**
* Transcribe audio data to text.
*/
async transcribeAudio(audioData) {
if (!this.transcriptionModel) {
throw new Error("Transcription model not configured");
}
console.log(`Sending ${audioData.byteLength} bytes to Whisper for transcription`);
try {
const result = await (0, ai_1.experimental_transcribe)({
model: this.transcriptionModel,
audio: audioData,
});
console.log(`Whisper transcription result: "${result.text}", language: ${result.language || "unknown"}`);
this.emit("transcription", {
text: result.text,
language: result.language,
});
// Send transcription to client for immediate feedback
this.sendMessage({
type: "transcription_result",
text: result.text,
language: result.language,
});
return result.text;
}
catch (error) {
console.error("Whisper transcription failed:", error);
throw error;
}
}
/**
* Process incoming base64-encoded audio: validate, decode, transcribe.
* Returns the transcribed text, or null if invalid / empty.
*/
async processAudioInput(base64Audio, format) {
if (!this.transcriptionModel) {
const error = new Error("Transcription model not configured for audio input");
this.emit("error", error);
this.sendMessage({ type: "error", error: error.message });
return null;
}
try {
const audioBuffer = Buffer.from(base64Audio, "base64");
// Validate audio size
if (audioBuffer.length > this.maxAudioInputSize) {
const sizeMB = (audioBuffer.length / (1024 * 1024)).toFixed(1);
const maxMB = (this.maxAudioInputSize / (1024 * 1024)).toFixed(1);
this.emit("error", new Error(`Audio input too large (${sizeMB} MB). Maximum allowed: ${maxMB} MB`));
return null;
}
if (audioBuffer.length === 0) {
this.emit("warning", "Received empty audio data");
return null;
}
this.emit("audio_received", { size: audioBuffer.length, format });
console.log(`Processing audio input: ${audioBuffer.length} bytes, format: ${format || "unknown"}`);
const transcribedText = await this.transcribeAudio(audioBuffer);
console.log(`Transcribed text: "${transcribedText}"`);
if (!transcribedText.trim()) {
this.emit("warning", "Transcription returned empty text");
this.sendMessage({
type: "transcription_error",
error: "Whisper returned empty text",
});
return null;
}
return transcribedText;
}
catch (error) {
console.error("Failed to process audio input:", error);
this.emit("error", error);
this.sendMessage({
type: "transcription_error",
error: `Transcription failed: ${error.message || String(error)}`,
});
return null;
}
}
}
exports.TranscriptionManager = TranscriptionManager;
//# sourceMappingURL=TranscriptionManager.js.map