feat: Introduce new core components for conversation and speech management

- Added ConversationManager for managing conversation history with configurable limits. - Implemented InputQueue for serial processing of input items. - Created SpeechManager for handling text-to-speech generation and streaming. - Developed StreamProcessor for processing LLM streams and forwarding events. - Added TranscriptionManager for audio transcription using AI SDK. - Introduced WebSocketManager for managing WebSocket connections and messaging. - Updated VoiceAgent to support new architecture and improved socket handling. - Refactored index files to export new core components.
2026-03-02 18:36:39 +00:00 · 2026-02-23 16:15:49 +05:30
parent 4dd30b89c0
commit 5e7eb469ae
71 changed files with 5175 additions and 19 deletions
--- a/dist/core/TranscriptionManager.js
+++ b/dist/core/TranscriptionManager.js
@@ -0,0 +1,106 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.TranscriptionManager = void 0;
+const events_1 = require("events");
+const ai_1 = require("ai");
+const types_1 = require("../types");
+/**
+ * Handles audio transcription using the AI SDK transcription model
+ * and validation of incoming audio data.
+ */
+class TranscriptionManager extends events_1.EventEmitter {
+    transcriptionModel;
+    maxAudioInputSize;
+    /** Callback to send messages over the WebSocket */
+    sendMessage = () => { };
+    constructor(options = {}) {
+        super();
+        this.transcriptionModel = options.transcriptionModel;
+        this.maxAudioInputSize =
+            options.maxAudioInputSize ?? types_1.DEFAULT_MAX_AUDIO_SIZE;
+    }
+    get hasTranscriptionModel() {
+        return !!this.transcriptionModel;
+    }
+    /**
+     * Transcribe audio data to text.
+     */
+    async transcribeAudio(audioData) {
+        if (!this.transcriptionModel) {
+            throw new Error("Transcription model not configured");
+        }
+        console.log(`Sending ${audioData.byteLength} bytes to Whisper for transcription`);
+        try {
+            const result = await (0, ai_1.experimental_transcribe)({
+                model: this.transcriptionModel,
+                audio: audioData,
+            });
+            console.log(`Whisper transcription result: "${result.text}", language: ${result.language || "unknown"}`);
+            this.emit("transcription", {
+                text: result.text,
+                language: result.language,
+            });
+            // Send transcription to client for immediate feedback
+            this.sendMessage({
+                type: "transcription_result",
+                text: result.text,
+                language: result.language,
+            });
+            return result.text;
+        }
+        catch (error) {
+            console.error("Whisper transcription failed:", error);
+            throw error;
+        }
+    }
+    /**
+     * Process incoming base64-encoded audio: validate, decode, transcribe.
+     * Returns the transcribed text, or null if invalid / empty.
+     */
+    async processAudioInput(base64Audio, format) {
+        if (!this.transcriptionModel) {
+            const error = new Error("Transcription model not configured for audio input");
+            this.emit("error", error);
+            this.sendMessage({ type: "error", error: error.message });
+            return null;
+        }
+        try {
+            const audioBuffer = Buffer.from(base64Audio, "base64");
+            // Validate audio size
+            if (audioBuffer.length > this.maxAudioInputSize) {
+                const sizeMB = (audioBuffer.length / (1024 * 1024)).toFixed(1);
+                const maxMB = (this.maxAudioInputSize / (1024 * 1024)).toFixed(1);
+                this.emit("error", new Error(`Audio input too large (${sizeMB} MB). Maximum allowed: ${maxMB} MB`));
+                return null;
+            }
+            if (audioBuffer.length === 0) {
+                this.emit("warning", "Received empty audio data");
+                return null;
+            }
+            this.emit("audio_received", { size: audioBuffer.length, format });
+            console.log(`Processing audio input: ${audioBuffer.length} bytes, format: ${format || "unknown"}`);
+            const transcribedText = await this.transcribeAudio(audioBuffer);
+            console.log(`Transcribed text: "${transcribedText}"`);
+            if (!transcribedText.trim()) {
+                this.emit("warning", "Transcription returned empty text");
+                this.sendMessage({
+                    type: "transcription_error",
+                    error: "Whisper returned empty text",
+                });
+                return null;
+            }
+            return transcribedText;
+        }
+        catch (error) {
+            console.error("Failed to process audio input:", error);
+            this.emit("error", error);
+            this.sendMessage({
+                type: "transcription_error",
+                error: `Transcription failed: ${error.message || String(error)}`,
+            });
+            return null;
+        }
+    }
+}
+exports.TranscriptionManager = TranscriptionManager;
+//# sourceMappingURL=TranscriptionManager.js.map