feat: add dist directory with compiled files and type definitions

- Created dist/index.js and dist/index.d.ts for main entry points.
- Added source maps for index.js and index.d.ts.
- Introduced dist/utils/StreamBuffer.js and StreamBuffer.d.ts with source maps.
- Updated package.json to point main and types to dist files.
- Included additional files in package.json for distribution.
- Added peerDependencies and updated devDependencies.
This commit is contained in:
Bijit Mondal
2026-02-14 14:39:23 +05:30
parent 637d57fb41
commit ce10d521f3
15 changed files with 1385 additions and 10 deletions

2
.gitignore vendored
View File

@@ -4,4 +4,4 @@ node_modules
.marscode
dist
# dist

View File

@@ -1,5 +1,7 @@
# voice-agent-ai-sdk
[![npm version](https://badge.fury.io/js/voice-agent-ai-sdk.svg)](https://www.npmjs.com/package/voice-agent-ai-sdk)
Streaming voice/text agent SDK built on [AI SDK](https://sdk.vercel.ai/) with optional WebSocket transport.
## Features

242
dist/VoiceAgent.d.ts vendored Normal file
View File

@@ -0,0 +1,242 @@
import { WebSocket } from "ws";
import { EventEmitter } from "events";
import { streamText, LanguageModel, type Tool, type ModelMessage, type TranscriptionModel, type SpeechModel } from "ai";
/**
* Configuration for streaming speech behavior
*/
interface StreamingSpeechConfig {
/** Minimum characters before generating speech for a chunk */
minChunkSize: number;
/** Maximum characters per chunk (will split at sentence boundary before this) */
maxChunkSize: number;
/** Whether to enable parallel TTS generation */
parallelGeneration: boolean;
/** Maximum number of parallel TTS requests */
maxParallelRequests: number;
}
/**
* Configuration for conversation history memory management
*/
interface HistoryConfig {
/** Maximum number of messages to keep in history. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
maxMessages: number;
/** Maximum total character count across all messages. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
maxTotalChars: number;
}
export interface VoiceAgentOptions {
model: LanguageModel;
transcriptionModel?: TranscriptionModel;
speechModel?: SpeechModel;
instructions?: string;
stopWhen?: NonNullable<Parameters<typeof streamText>[0]["stopWhen"]>;
tools?: Record<string, Tool>;
endpoint?: string;
voice?: string;
speechInstructions?: string;
outputFormat?: string;
/** Configuration for streaming speech generation */
streamingSpeech?: Partial<StreamingSpeechConfig>;
/** Configuration for conversation history memory limits */
history?: Partial<HistoryConfig>;
/** Maximum audio input size in bytes (default: 10 MB) */
maxAudioInputSize?: number;
}
export declare class VoiceAgent extends EventEmitter {
private socket?;
private tools;
private model;
private transcriptionModel?;
private speechModel?;
private instructions;
private stopWhen;
private endpoint?;
private isConnected;
private conversationHistory;
private voice;
private speechInstructions?;
private outputFormat;
private isProcessing;
private isDestroyed;
private inputQueue;
private processingQueue;
private currentStreamAbortController?;
private historyConfig;
private maxAudioInputSize;
private streamingSpeechConfig;
private currentSpeechAbortController?;
private speechChunkQueue;
private nextChunkId;
private isSpeaking;
private pendingTextBuffer;
private speechQueueDonePromise?;
private speechQueueDoneResolve?;
constructor(options: VoiceAgentOptions);
/**
* Ensure the agent has not been destroyed. Throws if it has.
*/
private ensureNotDestroyed;
private setupListeners;
/**
* Clean up all in-flight state when the connection drops.
*/
private cleanupOnDisconnect;
registerTools(tools: Record<string, Tool>): void;
/**
* Transcribe audio data to text using the configured transcription model
*/
transcribeAudio(audioData: Buffer | Uint8Array): Promise<string>;
/**
* Generate speech from text using the configured speech model
* @param abortSignal Optional signal to cancel the speech generation
*/
generateSpeechFromText(text: string, abortSignal?: AbortSignal): Promise<Uint8Array>;
/**
* Interrupt ongoing speech generation and playback (barge-in support).
* This only interrupts TTS — the LLM stream is left running.
*/
interruptSpeech(reason?: string): void;
/**
* Interrupt both the current LLM stream and ongoing speech.
* Use this for barge-in scenarios where the entire response should be cancelled.
*/
interruptCurrentResponse(reason?: string): void;
/**
* Extract complete sentences from text buffer
* Returns [extractedSentences, remainingBuffer]
*/
private extractSentences;
/**
* Trim conversation history to stay within configured limits.
* Removes oldest messages (always in pairs to preserve user/assistant turns).
*/
private trimHistory;
/**
* Queue a text chunk for speech generation
*/
private queueSpeechChunk;
/**
* Generate audio for a single chunk
*/
private generateChunkAudio;
/**
* Process the speech queue and send audio chunks in order
*/
private processSpeechQueue;
/**
* Process text delta for streaming speech.
* Call this as text chunks arrive from LLM.
*/
private processTextForStreamingSpeech;
/**
* Flush any remaining text in the buffer to speech
* Call this when stream ends
*/
private flushStreamingSpeech;
/**
* Process incoming audio data: transcribe and generate response
*/
private processAudioInput;
connect(url?: string): Promise<void>;
/**
* Attach an existing WebSocket (server-side usage).
* Use this when a WS server accepts a connection and you want the
* agent to handle messages on that socket.
*/
handleSocket(socket: WebSocket): void;
/**
* Send text input for processing (bypasses transcription).
* Requests are queued and processed serially to prevent race conditions.
*/
sendText(text: string): Promise<string>;
/**
* Send audio data to be transcribed and processed
* @param audioData Base64 encoded audio data
*/
sendAudio(audioData: string): Promise<void>;
/**
* Send raw audio buffer to be transcribed and processed
*/
sendAudioBuffer(audioBuffer: Buffer | Uint8Array): Promise<void>;
/**
* Enqueue a text input for serial processing.
* This ensures only one processUserInput runs at a time, preventing
* race conditions on conversationHistory, fullText accumulation, etc.
*/
private enqueueInput;
/**
* Drain the input queue, processing one request at a time.
*/
private drainInputQueue;
/**
* Process user input with streaming text generation.
* Handles the full pipeline: text -> LLM (streaming) -> TTS -> WebSocket.
*
* This method is designed to be called serially via drainInputQueue().
*/
private processUserInput;
/**
* Generate speech for full text at once (non-streaming fallback)
* Useful when you want to bypass streaming speech for short responses
*/
generateAndSendSpeechFull(text: string): Promise<void>;
/**
* Send a message via WebSocket if connected.
* Gracefully handles send failures (e.g., socket closing mid-send).
*/
private sendWebSocketMessage;
/**
* Start listening for voice input
*/
startListening(): void;
/**
* Stop listening for voice input
*/
stopListening(): void;
/**
* Clear conversation history
*/
clearHistory(): void;
/**
* Get current conversation history
*/
getHistory(): ModelMessage[];
/**
* Set conversation history (useful for restoring sessions)
*/
setHistory(history: ModelMessage[]): void;
/**
* Internal helper to close and clean up the current socket.
*/
private disconnectSocket;
/**
* Disconnect from WebSocket and stop all in-flight work.
*/
disconnect(): void;
/**
* Permanently destroy the agent, releasing all resources.
* After calling this, the agent cannot be reused.
*/
destroy(): void;
/**
* Check if agent is connected to WebSocket
*/
get connected(): boolean;
/**
* Check if agent is currently processing a request
*/
get processing(): boolean;
/**
* Check if agent is currently speaking (generating/playing audio)
*/
get speaking(): boolean;
/**
* Get the number of pending speech chunks in the queue
*/
get pendingSpeechChunks(): number;
/**
* Check if agent has been permanently destroyed
*/
get destroyed(): boolean;
}
export {};
//# sourceMappingURL=VoiceAgent.d.ts.map

1
dist/VoiceAgent.d.ts.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"VoiceAgent.d.ts","sourceRoot":"","sources":["../src/VoiceAgent.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EACL,UAAU,EACV,aAAa,EAEb,KAAK,IAAI,EACT,KAAK,YAAY,EAGjB,KAAK,kBAAkB,EACvB,KAAK,WAAW,EACjB,MAAM,IAAI,CAAC;AAWZ;;GAEG;AACH,UAAU,qBAAqB;IAC7B,8DAA8D;IAC9D,YAAY,EAAE,MAAM,CAAC;IACrB,iFAAiF;IACjF,YAAY,EAAE,MAAM,CAAC;IACrB,gDAAgD;IAChD,kBAAkB,EAAE,OAAO,CAAC;IAC5B,8CAA8C;IAC9C,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED;;GAEG;AACH,UAAU,aAAa;IACrB,yHAAyH;IACzH,WAAW,EAAE,MAAM,CAAC;IACpB,6HAA6H;IAC7H,aAAa,EAAE,MAAM,CAAC;CACvB;AAKD,MAAM,WAAW,iBAAiB;IAChC,KAAK,EAAE,aAAa,CAAC;IACrB,kBAAkB,CAAC,EAAE,kBAAkB,CAAC;IACxC,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,WAAW,CAAC,UAAU,CAAC,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;IACrE,KAAK,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAC7B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,oDAAoD;IACpD,eAAe,CAAC,EAAE,OAAO,CAAC,qBAAqB,CAAC,CAAC;IACjD,2DAA2D;IAC3D,OAAO,CAAC,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IACjC,yDAAyD;IACzD,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,UAAW,SAAQ,YAAY;IAC1C,OAAO,CAAC,MAAM,CAAC,CAAY;IAC3B,OAAO,CAAC,KAAK,CAA4B;IACzC,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,kBAAkB,CAAC,CAAqB;IAChD,OAAO,CAAC,WAAW,CAAC,CAAc;IAClC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,QAAQ,CAA4D;IAC5E,OAAO,CAAC,QAAQ,CAAC,CAAS;IAC1B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,kBAAkB,CAAC,CAAS;IACpC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,WAAW,CAAS;IAG5B,OAAO,CAAC,UAAU,CAA2F;IAC7G,OAAO,CAAC,eAAe,CAAS;IAGhC,OAAO,CAAC,4BAA4B,CAAC,CAAkB;IAGvD,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,iBAAiB,CAAS;IAGlC,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,4BAA4B,CAAC,CAAkB;IACvD,OAAO,CAAC,gBAAgB,CAAqB;IAC7C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,UAAU,CAAS;IAC3B,OAAO,CAAC,iBAAiB,CAAM;IAG/B,OAAO,CAAC,sBAAsB,CAAC,CAAgB;IAC/C,OAAO,CAAC,sBAAsB,CAAC,CAAa;gBAEhC,OAAO,EAAE,iBAAiB;IAkCtC;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAM1B,OAAO,CAAC,cAAc;IAuDtB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA8BpB,aAAa,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC;IAIhD;;OAEG;IACU,eAAe,CAAC,SAAS,EAAE,MAAM,GAAG,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAuC7E;;;OAGG;IACU,sBAAsB,CACjC,IAAI,EAAE,MAAM,EACZ,WAAW,CAAC,EAAE,WAAW,GACxB,OAAO,CAAC,UAAU,CAAC;IAiBtB;;;OAGG;IACI,eAAe,CAAC,MAAM,GAAE,MAAsB,GAAG,IAAI;IAgC5D;;;OAGG;IACI,wBAAwB,CAAC,MAAM,GAAE,MAAsB,GAAG,IAAI;IAUrE;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IA8CxB;;;OAGG;IACH,OAAO,CAAC,WAAW;IAmCnB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAsCxB;;OAEG;YACW,kBAAkB;IAwBhC;;OAEG;YACW,kBAAkB;IA+FhC;;;OAGG;IACH,OAAO,CAAC,6BAA6B;IAarC;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAO5B;;OAEG;YACW,iBAAiB;IAiDlB,OAAO,CAAC,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8BjD;;;;OAIG;IACI,YAAY,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI;IAc5C;;;OAGG;IACU,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAQpD;;;OAGG;IACU,SAAS,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKxD;;OAEG;IACU,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC;IAM7E;;;;OAIG;IACH,OAAO,CAAC,YAAY;IAOpB;;OAEG;YACW,eAAe;IAmB7B;;;;;OAKG;YACW,gBAAgB;IAuT9B;;;OAGG;IACU,yBAAyB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IA8BnE;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IA2B5B;;OAEG;IACH,cAAc;IAKd;;OAEG;IACH,aAAa;IAKb;;OAEG;IACH,YAAY;IAKZ;;OAEG;IACH,UAAU,IAAI,YAAY,EAAE;IAI5B;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,YAAY,EAAE;IAIlC;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAmBxB;;OAEG;IACH,UAAU;IAIV;;;OAGG;IACH,OAAO;IAQP;;OAEG;IACH,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED;;OAEG;IACH,IAAI,UAAU,IAAI,OAAO,CAExB;IAED;;OAEG;IACH,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED;;OAEG;IACH,IAAI,mBAAmB,IAAI,MAAM,CAEhC;IAED;;OAEG;IACH,IAAI,SAAS,IAAI,OAAO,CAEvB;CACF"}

1093
dist/VoiceAgent.js vendored Normal file

File diff suppressed because it is too large Load Diff

1
dist/VoiceAgent.js.map vendored Normal file

File diff suppressed because one or more lines are too long

2
dist/index.d.ts vendored Normal file
View File

@@ -0,0 +1,2 @@
export { VoiceAgent, type VoiceAgentOptions } from "./VoiceAgent";
//# sourceMappingURL=index.d.ts.map

1
dist/index.d.ts.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,KAAK,iBAAiB,EAAE,MAAM,cAAc,CAAC"}

6
dist/index.js vendored Normal file
View File

@@ -0,0 +1,6 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.VoiceAgent = void 0;
var VoiceAgent_1 = require("./VoiceAgent");
Object.defineProperty(exports, "VoiceAgent", { enumerable: true, get: function () { return VoiceAgent_1.VoiceAgent; } });
//# sourceMappingURL=index.js.map

1
dist/index.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,2CAAkE;AAAzD,wGAAA,UAAU,OAAA"}

1
dist/utils/StreamBuffer.d.ts vendored Normal file
View File

@@ -0,0 +1 @@
//# sourceMappingURL=StreamBuffer.d.ts.map

1
dist/utils/StreamBuffer.d.ts.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"StreamBuffer.d.ts","sourceRoot":"","sources":["../../src/utils/StreamBuffer.ts"],"names":[],"mappings":""}

2
dist/utils/StreamBuffer.js vendored Normal file
View File

@@ -0,0 +1,2 @@
"use strict";
//# sourceMappingURL=StreamBuffer.js.map

1
dist/utils/StreamBuffer.js.map vendored Normal file
View File

@@ -0,0 +1 @@
{"version":3,"file":"StreamBuffer.js","sourceRoot":"","sources":["../../src/utils/StreamBuffer.ts"],"names":[],"mappings":""}

View File

@@ -2,7 +2,13 @@
"name": "voice-agent-ai-sdk",
"version": "0.1.0",
"description": "Voice AI Agent with ai-sdk",
"main": "src/index.ts",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"files": [
"dist",
"README.md",
"LICENSE"
],
"scripts": {
"build": "tsc",
"dev": "tsc -w",
@@ -16,23 +22,38 @@
"websocket",
"ai",
"agent",
"tools"
"tools",
"tts",
"speech",
"ai-sdk",
"streaming"
],
"author": "Bijit Mondal",
"license": "MIT",
"repository": {
"type": "git",
"url": "git+https://github.com/Bijit-Mondal/voiceAgent.git"
},
"bugs": {
"url": "https://github.com/Bijit-Mondal/voiceAgent/issues"
},
"homepage": "https://github.com/Bijit-Mondal/voiceAgent#readme",
"packageManager": "pnpm@10.27.0",
"devDependencies": {
"@ai-sdk/openai": "^3.0.28",
"@types/node": "^25.2.3",
"@types/ws": "^8.18.1",
"tsx": "^4.20.5",
"typescript": "^5.9.3"
"peerDependencies": {
"ai": "^6.0.0"
},
"dependencies": {
"ai": "^6.0.85",
"dotenv": "^17.2.3",
"ws": "^8.19.0",
"zod": "^4.3.6",
"zod-to-json-schema": "^3.25.1"
},
"devDependencies": {
"@ai-sdk/openai": "^3.0.28",
"@types/node": "^25.2.3",
"@types/ws": "^8.18.1",
"ai": "^6.0.85",
"tsx": "^4.20.5",
"typescript": "^5.9.3"
}
}