This commit is contained in:
Bijit Mondal
2026-02-13 13:57:10 +05:30
parent 393a616fe1
commit 77bac597e4
10 changed files with 854 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
.env
node_modules
.marscode
dist

60
README.md Normal file
View File

@@ -0,0 +1,60 @@
# voice-agent-ai-sdk
Minimal voice/text agent SDK built on AI SDK with optional WebSocket transport.
## Current status
- Text flow works via `sendText()` (no WebSocket required).
- WebSocket flow works when `connect()` is used with a running WS endpoint.
- Voice streaming is not implemented yet.
## Prerequisites
- Node.js 20+
- pnpm
- OpenAI API key
## Setup
1. Install dependencies:
pnpm install
2. Configure environment variables in `.env`:
OPENAI_API_KEY=your_openai_api_key
VOICE_WS_ENDPOINT=ws://localhost:8080
## Run (text-only check)
This validates model + tool calls without requiring WebSocket:
pnpm demo
Expected logs include `text` events and optional `tool_start`.
## Run (WebSocket check)
1. Start local WS server:
pnpm ws:server
2. In another terminal, run demo:
pnpm demo
The demo will:
- run `sendText()` first (text-only sanity check), then
- connect to `VOICE_WS_ENDPOINT` if provided.
## Scripts
- `pnpm build` build TypeScript
- `pnpm dev` watch TypeScript
- `pnpm demo` run demo client
- `pnpm ws:server` run local test WebSocket server
## Notes
- If `VOICE_WS_ENDPOINT` is empty, WebSocket connect is skipped.
- The sample WS server sends a mock `transcript` message for end-to-end testing.

58
example/demo.ts Normal file
View File

@@ -0,0 +1,58 @@
import "dotenv/config";
import { VoiceAgent } from "../src";
import { tool } from "ai";
import { z } from "zod";
import { openai } from "@ai-sdk/openai";
// 1. Define Tools using standard AI SDK
const weatherTool = tool({
description: 'Get the weather in a location',
inputSchema: z.object({
location: z.string().describe('The location to get the weather for'),
}),
execute: async ({ location }) => ({
location,
temperature: 72 + Math.floor(Math.random() * 21) - 10,
}),
});
// 2. Initialize Agent
const agent = new VoiceAgent({
model: openai('gpt-4o'),
instructions: "You are a helpful voice assistant. Use tools when needed.",
endpoint: process.env.VOICE_WS_ENDPOINT,
tools: {
getWeather: weatherTool, // Pass the AI SDK tool directly
},
});
// 3. Handle Events
agent.on("connected", () => console.log("Connected to WebSocket"));
// Handle incoming audio from AI (play this to user)
agent.on("audio", (base64Audio: string) => {
// process.stdout.write(Buffer.from(base64Audio, 'base64'));
});
// Logs
agent.on("text", (msg: { role: string; text: string }) => console.log(`${msg.role}: ${msg.text}`));
agent.on("tool_start", ({ name }: { name: string }) => console.log(`[System] Calling ${name}...`));
// 4. Start (wrap in async function since we can't use top-level await)
(async () => {
try {
// For now: text-only sanity check, no voice pipeline required.
await agent.sendText("What is the weather in Berlin?");
// Optional: connect only when an endpoint is provided.
if (process.env.VOICE_WS_ENDPOINT) {
await agent.connect(process.env.VOICE_WS_ENDPOINT);
console.log("Agent connected successfully");
}
} catch (error) {
console.error("Agent run failed:", error);
}
})();
// 5. Simulate sending audio (in a real app, stream microphone data here)
// agent.sendAudio("Base64EncodedPCM16AudioData...");

43
example/ws-server.ts Normal file
View File

@@ -0,0 +1,43 @@
import "dotenv/config";
import { WebSocketServer } from "ws";
const endpoint = process.env.VOICE_WS_ENDPOINT || "ws://localhost:8080";
const url = new URL(endpoint);
const port = Number(url.port || 8080);
const host = url.hostname || "localhost";
const wss = new WebSocketServer({ port, host });
wss.on("listening", () => {
console.log(`[ws-server] listening on ${endpoint}`);
});
wss.on("connection", (socket) => {
console.log("[ws-server] client connected");
// Send a sample transcript to test text pipeline end-to-end.
setTimeout(() => {
socket.send(
JSON.stringify({
type: "transcript",
text: "What is the weather in Berlin?",
}),
);
}, 500);
socket.on("message", (data) => {
try {
const msg = JSON.parse(data.toString()) as {
type?: string;
text?: string;
};
console.log("[ws-server] <-", msg);
} catch {
console.log("[ws-server] <- raw", data.toString());
}
});
socket.on("close", () => {
console.log("[ws-server] client disconnected");
});
});

37
package.json Normal file
View File

@@ -0,0 +1,37 @@
{
"name": "voice-agent-ai-sdk",
"version": "0.0.1",
"description": "Voice AI Agent with ai-sdk",
"main": "src/index.ts",
"scripts": {
"build": "tsc",
"dev": "tsc -w",
"demo": "tsx example/demo.ts",
"ws:server": "tsx example/ws-server.ts",
"prepublishOnly": "pnpm build"
},
"keywords": [
"voice",
"websocket",
"ai",
"agent",
"tools"
],
"author": "Bijit Mondal",
"license": "MIT",
"packageManager": "pnpm@10.27.0",
"devDependencies": {
"@ai-sdk/openai": "^3.0.28",
"@types/node": "^25.2.3",
"@types/ws": "^8.18.1",
"tsx": "^4.20.5",
"typescript": "^5.9.3"
},
"dependencies": {
"ai": "^6.0.85",
"dotenv": "^17.2.3",
"ws": "^8.19.0",
"zod": "^4.3.6",
"zod-to-json-schema": "^3.25.1"
}
}

493
pnpm-lock.yaml generated Normal file
View File

@@ -0,0 +1,493 @@
lockfileVersion: '9.0'
settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
importers:
.:
dependencies:
ai:
specifier: ^6.0.85
version: 6.0.85(zod@4.3.6)
dotenv:
specifier: ^17.2.3
version: 17.3.1
ws:
specifier: ^8.19.0
version: 8.19.0
zod:
specifier: ^4.3.6
version: 4.3.6
zod-to-json-schema:
specifier: ^3.25.1
version: 3.25.1(zod@4.3.6)
devDependencies:
'@ai-sdk/openai':
specifier: ^3.0.28
version: 3.0.28(zod@4.3.6)
'@types/node':
specifier: ^25.2.3
version: 25.2.3
'@types/ws':
specifier: ^8.18.1
version: 8.18.1
tsx:
specifier: ^4.20.5
version: 4.21.0
typescript:
specifier: ^5.9.3
version: 5.9.3
packages:
'@ai-sdk/gateway@3.0.45':
resolution: {integrity: sha512-ZB6kHV+D8mLCRnkpWotLCV/rZK4NiODxx4Kv7JdT9QmQknbG/scbE4iyoT4JLFdULA8Y/IVbMvyE0Nwq3Dceqw==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4.1.8
'@ai-sdk/openai@3.0.28':
resolution: {integrity: sha512-m2Dm6fwUzMksqnPrd5f/WZ4cZ9GTZHpzsVO6jxKQwwc84gFHzAFZmUCG0C5mV7XlPOw4mwaiYV3HfLiEfphvvA==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4.1.8
'@ai-sdk/provider-utils@4.0.15':
resolution: {integrity: sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4.1.8
'@ai-sdk/provider@3.0.8':
resolution: {integrity: sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ==}
engines: {node: '>=18'}
'@esbuild/aix-ppc64@0.27.3':
resolution: {integrity: sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg==}
engines: {node: '>=18'}
cpu: [ppc64]
os: [aix]
'@esbuild/android-arm64@0.27.3':
resolution: {integrity: sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==}
engines: {node: '>=18'}
cpu: [arm64]
os: [android]
'@esbuild/android-arm@0.27.3':
resolution: {integrity: sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==}
engines: {node: '>=18'}
cpu: [arm]
os: [android]
'@esbuild/android-x64@0.27.3':
resolution: {integrity: sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==}
engines: {node: '>=18'}
cpu: [x64]
os: [android]
'@esbuild/darwin-arm64@0.27.3':
resolution: {integrity: sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==}
engines: {node: '>=18'}
cpu: [arm64]
os: [darwin]
'@esbuild/darwin-x64@0.27.3':
resolution: {integrity: sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==}
engines: {node: '>=18'}
cpu: [x64]
os: [darwin]
'@esbuild/freebsd-arm64@0.27.3':
resolution: {integrity: sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==}
engines: {node: '>=18'}
cpu: [arm64]
os: [freebsd]
'@esbuild/freebsd-x64@0.27.3':
resolution: {integrity: sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==}
engines: {node: '>=18'}
cpu: [x64]
os: [freebsd]
'@esbuild/linux-arm64@0.27.3':
resolution: {integrity: sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==}
engines: {node: '>=18'}
cpu: [arm64]
os: [linux]
'@esbuild/linux-arm@0.27.3':
resolution: {integrity: sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==}
engines: {node: '>=18'}
cpu: [arm]
os: [linux]
'@esbuild/linux-ia32@0.27.3':
resolution: {integrity: sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==}
engines: {node: '>=18'}
cpu: [ia32]
os: [linux]
'@esbuild/linux-loong64@0.27.3':
resolution: {integrity: sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==}
engines: {node: '>=18'}
cpu: [loong64]
os: [linux]
'@esbuild/linux-mips64el@0.27.3':
resolution: {integrity: sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==}
engines: {node: '>=18'}
cpu: [mips64el]
os: [linux]
'@esbuild/linux-ppc64@0.27.3':
resolution: {integrity: sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==}
engines: {node: '>=18'}
cpu: [ppc64]
os: [linux]
'@esbuild/linux-riscv64@0.27.3':
resolution: {integrity: sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==}
engines: {node: '>=18'}
cpu: [riscv64]
os: [linux]
'@esbuild/linux-s390x@0.27.3':
resolution: {integrity: sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==}
engines: {node: '>=18'}
cpu: [s390x]
os: [linux]
'@esbuild/linux-x64@0.27.3':
resolution: {integrity: sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==}
engines: {node: '>=18'}
cpu: [x64]
os: [linux]
'@esbuild/netbsd-arm64@0.27.3':
resolution: {integrity: sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==}
engines: {node: '>=18'}
cpu: [arm64]
os: [netbsd]
'@esbuild/netbsd-x64@0.27.3':
resolution: {integrity: sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==}
engines: {node: '>=18'}
cpu: [x64]
os: [netbsd]
'@esbuild/openbsd-arm64@0.27.3':
resolution: {integrity: sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==}
engines: {node: '>=18'}
cpu: [arm64]
os: [openbsd]
'@esbuild/openbsd-x64@0.27.3':
resolution: {integrity: sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==}
engines: {node: '>=18'}
cpu: [x64]
os: [openbsd]
'@esbuild/openharmony-arm64@0.27.3':
resolution: {integrity: sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==}
engines: {node: '>=18'}
cpu: [arm64]
os: [openharmony]
'@esbuild/sunos-x64@0.27.3':
resolution: {integrity: sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==}
engines: {node: '>=18'}
cpu: [x64]
os: [sunos]
'@esbuild/win32-arm64@0.27.3':
resolution: {integrity: sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==}
engines: {node: '>=18'}
cpu: [arm64]
os: [win32]
'@esbuild/win32-ia32@0.27.3':
resolution: {integrity: sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==}
engines: {node: '>=18'}
cpu: [ia32]
os: [win32]
'@esbuild/win32-x64@0.27.3':
resolution: {integrity: sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==}
engines: {node: '>=18'}
cpu: [x64]
os: [win32]
'@opentelemetry/api@1.9.0':
resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==}
engines: {node: '>=8.0.0'}
'@standard-schema/spec@1.1.0':
resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
'@types/node@25.2.3':
resolution: {integrity: sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ==}
'@types/ws@8.18.1':
resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==}
'@vercel/oidc@3.1.0':
resolution: {integrity: sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w==}
engines: {node: '>= 20'}
ai@6.0.85:
resolution: {integrity: sha512-2bP7M+OcNQGSIH8I3jdujUadxj4tAwuHBvLhpmDSlcjRXXry3zNGEajjjRraOjObHMO/Yqa37PJWhPVHIHt2TQ==}
engines: {node: '>=18'}
peerDependencies:
zod: ^3.25.76 || ^4.1.8
dotenv@17.3.1:
resolution: {integrity: sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==}
engines: {node: '>=12'}
esbuild@0.27.3:
resolution: {integrity: sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==}
engines: {node: '>=18'}
hasBin: true
eventsource-parser@3.0.6:
resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==}
engines: {node: '>=18.0.0'}
fsevents@2.3.3:
resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
os: [darwin]
get-tsconfig@4.13.6:
resolution: {integrity: sha512-shZT/QMiSHc/YBLxxOkMtgSid5HFoauqCE3/exfsEcwg1WkeqjG+V40yBbBrsD+jW2HDXcs28xOfcbm2jI8Ddw==}
json-schema@0.4.0:
resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==}
resolve-pkg-maps@1.0.0:
resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==}
tsx@4.21.0:
resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==}
engines: {node: '>=18.0.0'}
hasBin: true
typescript@5.9.3:
resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==}
engines: {node: '>=14.17'}
hasBin: true
undici-types@7.16.0:
resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==}
ws@8.19.0:
resolution: {integrity: sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==}
engines: {node: '>=10.0.0'}
peerDependencies:
bufferutil: ^4.0.1
utf-8-validate: '>=5.0.2'
peerDependenciesMeta:
bufferutil:
optional: true
utf-8-validate:
optional: true
zod-to-json-schema@3.25.1:
resolution: {integrity: sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==}
peerDependencies:
zod: ^3.25 || ^4
zod@4.3.6:
resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==}
snapshots:
'@ai-sdk/gateway@3.0.45(zod@4.3.6)':
dependencies:
'@ai-sdk/provider': 3.0.8
'@ai-sdk/provider-utils': 4.0.15(zod@4.3.6)
'@vercel/oidc': 3.1.0
zod: 4.3.6
'@ai-sdk/openai@3.0.28(zod@4.3.6)':
dependencies:
'@ai-sdk/provider': 3.0.8
'@ai-sdk/provider-utils': 4.0.15(zod@4.3.6)
zod: 4.3.6
'@ai-sdk/provider-utils@4.0.15(zod@4.3.6)':
dependencies:
'@ai-sdk/provider': 3.0.8
'@standard-schema/spec': 1.1.0
eventsource-parser: 3.0.6
zod: 4.3.6
'@ai-sdk/provider@3.0.8':
dependencies:
json-schema: 0.4.0
'@esbuild/aix-ppc64@0.27.3':
optional: true
'@esbuild/android-arm64@0.27.3':
optional: true
'@esbuild/android-arm@0.27.3':
optional: true
'@esbuild/android-x64@0.27.3':
optional: true
'@esbuild/darwin-arm64@0.27.3':
optional: true
'@esbuild/darwin-x64@0.27.3':
optional: true
'@esbuild/freebsd-arm64@0.27.3':
optional: true
'@esbuild/freebsd-x64@0.27.3':
optional: true
'@esbuild/linux-arm64@0.27.3':
optional: true
'@esbuild/linux-arm@0.27.3':
optional: true
'@esbuild/linux-ia32@0.27.3':
optional: true
'@esbuild/linux-loong64@0.27.3':
optional: true
'@esbuild/linux-mips64el@0.27.3':
optional: true
'@esbuild/linux-ppc64@0.27.3':
optional: true
'@esbuild/linux-riscv64@0.27.3':
optional: true
'@esbuild/linux-s390x@0.27.3':
optional: true
'@esbuild/linux-x64@0.27.3':
optional: true
'@esbuild/netbsd-arm64@0.27.3':
optional: true
'@esbuild/netbsd-x64@0.27.3':
optional: true
'@esbuild/openbsd-arm64@0.27.3':
optional: true
'@esbuild/openbsd-x64@0.27.3':
optional: true
'@esbuild/openharmony-arm64@0.27.3':
optional: true
'@esbuild/sunos-x64@0.27.3':
optional: true
'@esbuild/win32-arm64@0.27.3':
optional: true
'@esbuild/win32-ia32@0.27.3':
optional: true
'@esbuild/win32-x64@0.27.3':
optional: true
'@opentelemetry/api@1.9.0': {}
'@standard-schema/spec@1.1.0': {}
'@types/node@25.2.3':
dependencies:
undici-types: 7.16.0
'@types/ws@8.18.1':
dependencies:
'@types/node': 25.2.3
'@vercel/oidc@3.1.0': {}
ai@6.0.85(zod@4.3.6):
dependencies:
'@ai-sdk/gateway': 3.0.45(zod@4.3.6)
'@ai-sdk/provider': 3.0.8
'@ai-sdk/provider-utils': 4.0.15(zod@4.3.6)
'@opentelemetry/api': 1.9.0
zod: 4.3.6
dotenv@17.3.1: {}
esbuild@0.27.3:
optionalDependencies:
'@esbuild/aix-ppc64': 0.27.3
'@esbuild/android-arm': 0.27.3
'@esbuild/android-arm64': 0.27.3
'@esbuild/android-x64': 0.27.3
'@esbuild/darwin-arm64': 0.27.3
'@esbuild/darwin-x64': 0.27.3
'@esbuild/freebsd-arm64': 0.27.3
'@esbuild/freebsd-x64': 0.27.3
'@esbuild/linux-arm': 0.27.3
'@esbuild/linux-arm64': 0.27.3
'@esbuild/linux-ia32': 0.27.3
'@esbuild/linux-loong64': 0.27.3
'@esbuild/linux-mips64el': 0.27.3
'@esbuild/linux-ppc64': 0.27.3
'@esbuild/linux-riscv64': 0.27.3
'@esbuild/linux-s390x': 0.27.3
'@esbuild/linux-x64': 0.27.3
'@esbuild/netbsd-arm64': 0.27.3
'@esbuild/netbsd-x64': 0.27.3
'@esbuild/openbsd-arm64': 0.27.3
'@esbuild/openbsd-x64': 0.27.3
'@esbuild/openharmony-arm64': 0.27.3
'@esbuild/sunos-x64': 0.27.3
'@esbuild/win32-arm64': 0.27.3
'@esbuild/win32-ia32': 0.27.3
'@esbuild/win32-x64': 0.27.3
eventsource-parser@3.0.6: {}
fsevents@2.3.3:
optional: true
get-tsconfig@4.13.6:
dependencies:
resolve-pkg-maps: 1.0.0
json-schema@0.4.0: {}
resolve-pkg-maps@1.0.0: {}
tsx@4.21.0:
dependencies:
esbuild: 0.27.3
get-tsconfig: 4.13.6
optionalDependencies:
fsevents: 2.3.3
typescript@5.9.3: {}
undici-types@7.16.0: {}
ws@8.19.0: {}
zod-to-json-schema@3.25.1(zod@4.3.6):
dependencies:
zod: 4.3.6
zod@4.3.6: {}

136
src/VoiceAgent.ts Normal file
View File

@@ -0,0 +1,136 @@
import { WebSocket } from "ws";
import { EventEmitter } from "events";
import { generateText, LanguageModel, stepCountIs, type Tool } from "ai";
export interface VoiceAgentOptions {
model: LanguageModel; /// AI SDK Model (e.g., openai('gpt-4o'))
instructions?: string;
stopWhen?: NonNullable<Parameters<typeof generateText>[0]["stopWhen"]>;
tools?: Record<string, Tool>;
endpoint?: string;
}
export class VoiceAgent extends EventEmitter {
private socket?: WebSocket;
private tools: Record<string, Tool> = {};
private model: LanguageModel;
private instructions: string;
private stopWhen: NonNullable<Parameters<typeof generateText>[0]["stopWhen"]>;
private endpoint?: string;
private isConnected = false;
constructor(options: VoiceAgentOptions) {
super();
this.model = options.model;
this.instructions =
options.instructions || "You are a helpful voice assistant.";
this.stopWhen = options.stopWhen || stepCountIs(5);
this.endpoint = options.endpoint;
if (options.tools) {
this.tools = { ...options.tools };
}
}
private setupListeners() {
if (!this.socket) return;
this.socket.on("message", async (data) => {
try {
const message = JSON.parse(data.toString());
// Example: Handle transcribed text from the client/STT
if (message.type === "transcript") {
await this.processUserInput(message.text);
}
// Handle audio data
if (message.type === "audio") {
this.emit("audio", message.data);
}
} catch (err) {
console.error("Failed to process message:", err);
}
});
this.socket.on("close", () => {
console.log("Disconnected");
this.isConnected = false;
this.emit("disconnected");
});
}
public registerTools(tools: Record<string, Tool>) {
this.tools = { ...this.tools, ...tools };
}
public async connect(url?: string): Promise<void> {
return new Promise((resolve, reject) => {
try {
// Use provided URL, configured endpoint, or default URL
const wsUrl = url || this.endpoint || "ws://localhost:8080";
this.socket = new WebSocket(wsUrl);
this.setupListeners();
this.socket.once("open", () => {
this.isConnected = true;
this.emit("connected");
resolve();
});
this.socket.once("error", (error) => {
reject(error);
});
} catch (error) {
reject(error);
}
});
}
public async sendText(text: string): Promise<void> {
await this.processUserInput(text);
}
public sendAudio(audioData: string): void {
if (this.socket && this.isConnected) {
this.socket.send(JSON.stringify({
type: "audio",
data: audioData
}));
}
}
private async processUserInput(text: string) {
// Emit text event for incoming user input
this.emit("text", { role: "user", text });
const result = await generateText({
model: this.model,
system: this.instructions,
prompt: text,
tools: this.tools,
stopWhen: this.stopWhen,
});
for (const toolCall of result.toolCalls ?? []) {
this.emit("tool_start", { name: toolCall.toolName });
}
// Emit text event for assistant response
this.emit("text", { role: "assistant", text: result.text });
// Send the response back (either text to be TTSed or tool results)
if (this.socket && this.isConnected) {
this.socket.send(
JSON.stringify({
type: "response",
text: result.text,
toolCalls: result.toolCalls,
toolResults: result.toolResults,
}),
);
}
}
startListening() {
console.log("Starting voice agent...");
}
}

1
src/index.ts Normal file
View File

@@ -0,0 +1 @@
export { VoiceAgent } from "./VoiceAgent";

View File

19
tsconfig.json Normal file
View File

@@ -0,0 +1,19 @@
{
"compilerOptions": {
"target": "ES2024",
"module": "commonjs",
"lib": ["ES2024"],
"types": ["node", "ws"],
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"skipLibCheck": true,
"outDir": "./dist",
"rootDir": "./src",
"declaration": true,
"declarationMap": true,
"sourceMap": true
},
"include": ["src/**/*"]
}