feat: add dist directory with compiled files and type definitions

- Created dist/index.js and dist/index.d.ts for main entry points. - Added source maps for index.js and index.d.ts. - Introduced dist/utils/StreamBuffer.js and StreamBuffer.d.ts with source maps. - Updated package.json to point main and types to dist files. - Included additional files in package.json for distribution. - Added peerDependencies and updated devDependencies.
2026-03-02 18:36:39 +00:00 · 2026-02-14 14:39:23 +05:30
parent 637d57fb41
commit ce10d521f3
15 changed files with 1385 additions and 10 deletions
--- a/dist/VoiceAgent.d.ts
+++ b/dist/VoiceAgent.d.ts
@@ -0,0 +1,242 @@
+import { WebSocket } from "ws";
+import { EventEmitter } from "events";
+import { streamText, LanguageModel, type Tool, type ModelMessage, type TranscriptionModel, type SpeechModel } from "ai";
+/**
+ * Configuration for streaming speech behavior
+ */
+interface StreamingSpeechConfig {
+    /** Minimum characters before generating speech for a chunk */
+    minChunkSize: number;
+    /** Maximum characters per chunk (will split at sentence boundary before this) */
+    maxChunkSize: number;
+    /** Whether to enable parallel TTS generation */
+    parallelGeneration: boolean;
+    /** Maximum number of parallel TTS requests */
+    maxParallelRequests: number;
+}
+/**
+ * Configuration for conversation history memory management
+ */
+interface HistoryConfig {
+    /** Maximum number of messages to keep in history. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
+    maxMessages: number;
+    /** Maximum total character count across all messages. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
+    maxTotalChars: number;
+}
+export interface VoiceAgentOptions {
+    model: LanguageModel;
+    transcriptionModel?: TranscriptionModel;
+    speechModel?: SpeechModel;
+    instructions?: string;
+    stopWhen?: NonNullable<Parameters<typeof streamText>[0]["stopWhen"]>;
+    tools?: Record<string, Tool>;
+    endpoint?: string;
+    voice?: string;
+    speechInstructions?: string;
+    outputFormat?: string;
+    /** Configuration for streaming speech generation */
+    streamingSpeech?: Partial<StreamingSpeechConfig>;
+    /** Configuration for conversation history memory limits */
+    history?: Partial<HistoryConfig>;
+    /** Maximum audio input size in bytes (default: 10 MB) */
+    maxAudioInputSize?: number;
+}
+export declare class VoiceAgent extends EventEmitter {
+    private socket?;
+    private tools;
+    private model;
+    private transcriptionModel?;
+    private speechModel?;
+    private instructions;
+    private stopWhen;
+    private endpoint?;
+    private isConnected;
+    private conversationHistory;
+    private voice;
+    private speechInstructions?;
+    private outputFormat;
+    private isProcessing;
+    private isDestroyed;
+    private inputQueue;
+    private processingQueue;
+    private currentStreamAbortController?;
+    private historyConfig;
+    private maxAudioInputSize;
+    private streamingSpeechConfig;
+    private currentSpeechAbortController?;
+    private speechChunkQueue;
+    private nextChunkId;
+    private isSpeaking;
+    private pendingTextBuffer;
+    private speechQueueDonePromise?;
+    private speechQueueDoneResolve?;
+    constructor(options: VoiceAgentOptions);
+    /**
+     * Ensure the agent has not been destroyed. Throws if it has.
+     */
+    private ensureNotDestroyed;
+    private setupListeners;
+    /**
+     * Clean up all in-flight state when the connection drops.
+     */
+    private cleanupOnDisconnect;
+    registerTools(tools: Record<string, Tool>): void;
+    /**
+     * Transcribe audio data to text using the configured transcription model
+     */
+    transcribeAudio(audioData: Buffer | Uint8Array): Promise<string>;
+    /**
+     * Generate speech from text using the configured speech model
+     * @param abortSignal Optional signal to cancel the speech generation
+     */
+    generateSpeechFromText(text: string, abortSignal?: AbortSignal): Promise<Uint8Array>;
+    /**
+     * Interrupt ongoing speech generation and playback (barge-in support).
+     * This only interrupts TTS — the LLM stream is left running.
+     */
+    interruptSpeech(reason?: string): void;
+    /**
+     * Interrupt both the current LLM stream and ongoing speech.
+     * Use this for barge-in scenarios where the entire response should be cancelled.
+     */
+    interruptCurrentResponse(reason?: string): void;
+    /**
+     * Extract complete sentences from text buffer
+     * Returns [extractedSentences, remainingBuffer]
+     */
+    private extractSentences;
+    /**
+     * Trim conversation history to stay within configured limits.
+     * Removes oldest messages (always in pairs to preserve user/assistant turns).
+     */
+    private trimHistory;
+    /**
+     * Queue a text chunk for speech generation
+     */
+    private queueSpeechChunk;
+    /**
+     * Generate audio for a single chunk
+     */
+    private generateChunkAudio;
+    /**
+     * Process the speech queue and send audio chunks in order
+     */
+    private processSpeechQueue;
+    /**
+     * Process text delta for streaming speech.
+     * Call this as text chunks arrive from LLM.
+     */
+    private processTextForStreamingSpeech;
+    /**
+     * Flush any remaining text in the buffer to speech
+     * Call this when stream ends
+     */
+    private flushStreamingSpeech;
+    /**
+     * Process incoming audio data: transcribe and generate response
+     */
+    private processAudioInput;
+    connect(url?: string): Promise<void>;
+    /**
+     * Attach an existing WebSocket (server-side usage).
+     * Use this when a WS server accepts a connection and you want the
+     * agent to handle messages on that socket.
+     */
+    handleSocket(socket: WebSocket): void;
+    /**
+     * Send text input for processing (bypasses transcription).
+     * Requests are queued and processed serially to prevent race conditions.
+     */
+    sendText(text: string): Promise<string>;
+    /**
+     * Send audio data to be transcribed and processed
+     * @param audioData Base64 encoded audio data
+     */
+    sendAudio(audioData: string): Promise<void>;
+    /**
+     * Send raw audio buffer to be transcribed and processed
+     */
+    sendAudioBuffer(audioBuffer: Buffer | Uint8Array): Promise<void>;
+    /**
+     * Enqueue a text input for serial processing.
+     * This ensures only one processUserInput runs at a time, preventing
+     * race conditions on conversationHistory, fullText accumulation, etc.
+     */
+    private enqueueInput;
+    /**
+     * Drain the input queue, processing one request at a time.
+     */
+    private drainInputQueue;
+    /**
+     * Process user input with streaming text generation.
+     * Handles the full pipeline: text -> LLM (streaming) -> TTS -> WebSocket.
+     *
+     * This method is designed to be called serially via drainInputQueue().
+     */
+    private processUserInput;
+    /**
+     * Generate speech for full text at once (non-streaming fallback)
+     * Useful when you want to bypass streaming speech for short responses
+     */
+    generateAndSendSpeechFull(text: string): Promise<void>;
+    /**
+     * Send a message via WebSocket if connected.
+     * Gracefully handles send failures (e.g., socket closing mid-send).
+     */
+    private sendWebSocketMessage;
+    /**
+     * Start listening for voice input
+     */
+    startListening(): void;
+    /**
+     * Stop listening for voice input
+     */
+    stopListening(): void;
+    /**
+     * Clear conversation history
+     */
+    clearHistory(): void;
+    /**
+     * Get current conversation history
+     */
+    getHistory(): ModelMessage[];
+    /**
+     * Set conversation history (useful for restoring sessions)
+     */
+    setHistory(history: ModelMessage[]): void;
+    /**
+     * Internal helper to close and clean up the current socket.
+     */
+    private disconnectSocket;
+    /**
+     * Disconnect from WebSocket and stop all in-flight work.
+     */
+    disconnect(): void;
+    /**
+     * Permanently destroy the agent, releasing all resources.
+     * After calling this, the agent cannot be reused.
+     */
+    destroy(): void;
+    /**
+     * Check if agent is connected to WebSocket
+     */
+    get connected(): boolean;
+    /**
+     * Check if agent is currently processing a request
+     */
+    get processing(): boolean;
+    /**
+     * Check if agent is currently speaking (generating/playing audio)
+     */
+    get speaking(): boolean;
+    /**
+     * Get the number of pending speech chunks in the queue
+     */
+    get pendingSpeechChunks(): number;
+    /**
+     * Check if agent has been permanently destroyed
+     */
+    get destroyed(): boolean;
+}
+export {};
+//# sourceMappingURL=VoiceAgent.d.ts.map