feat: Introduce new core components for conversation and speech management

- Added ConversationManager for managing conversation history with configurable limits.
- Implemented InputQueue for serial processing of input items.
- Created SpeechManager for handling text-to-speech generation and streaming.
- Developed StreamProcessor for processing LLM streams and forwarding events.
- Added TranscriptionManager for audio transcription using AI SDK.
- Introduced WebSocketManager for managing WebSocket connections and messaging.
- Updated VoiceAgent to support new architecture and improved socket handling.
- Refactored index files to export new core components.
This commit is contained in:
Bijit Mondal
2026-02-23 16:15:49 +05:30
parent 4dd30b89c0
commit 5e7eb469ae
71 changed files with 5175 additions and 19 deletions

25
dist/VoiceAgent.js vendored
View File

@@ -5,6 +5,25 @@ const ws_1 = require("ws");
const events_1 = require("events");
const ai_1 = require("ai");
const types_1 = require("./types");
/**
* A single-session voice agent that manages one WebSocket connection at a time.
*
* **Important:** Each `VoiceAgent` instance holds its own conversation history,
* input queue, speech state, and WebSocket. It is designed for **one user per
* instance**. To support multiple concurrent users, create a separate
* `VoiceAgent` for each connection:
*
* ```ts
* wss.on("connection", (socket) => {
* const agent = new VoiceAgent({ model, ... });
* agent.handleSocket(socket);
* agent.on("disconnected", () => agent.destroy());
* });
* ```
*
* Sharing a single instance across multiple users will cause conversation
* history cross-contamination, interleaved audio, and unpredictable behavior.
*/
class VoiceAgent extends events_1.EventEmitter {
socket;
tools = {};
@@ -50,7 +69,7 @@ class VoiceAgent extends events_1.EventEmitter {
this.endpoint = options.endpoint;
this.voice = options.voice || "alloy";
this.speechInstructions = options.speechInstructions;
this.outputFormat = options.outputFormat || "mp3";
this.outputFormat = options.outputFormat || "opus";
this.maxAudioInputSize = options.maxAudioInputSize ?? types_1.DEFAULT_MAX_AUDIO_SIZE;
if (options.tools) {
this.tools = { ...options.tools };
@@ -576,6 +595,10 @@ class VoiceAgent extends events_1.EventEmitter {
* Attach an existing WebSocket (server-side usage).
* Use this when a WS server accepts a connection and you want the
* agent to handle messages on that socket.
*
* **Note:** Calling this while a socket is already attached will cleanly
* tear down the previous connection first. Each `VoiceAgent` instance
* supports only one socket at a time — create a new agent per user.
*/
handleSocket(socket) {
this.ensureNotDestroyed();