diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ec7eb15 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.env + +node_modules + +.marscode + +dist \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..526fc8f --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +# voice-agent-ai-sdk + +Minimal voice/text agent SDK built on AI SDK with optional WebSocket transport. + +## Current status + +- Text flow works via `sendText()` (no WebSocket required). +- WebSocket flow works when `connect()` is used with a running WS endpoint. +- Voice streaming is not implemented yet. + +## Prerequisites + +- Node.js 20+ +- pnpm +- OpenAI API key + +## Setup + +1. Install dependencies: + + pnpm install + +2. Configure environment variables in `.env`: + + OPENAI_API_KEY=your_openai_api_key + VOICE_WS_ENDPOINT=ws://localhost:8080 + +## Run (text-only check) + +This validates model + tool calls without requiring WebSocket: + +pnpm demo + +Expected logs include `text` events and optional `tool_start`. + +## Run (WebSocket check) + +1. Start local WS server: + + pnpm ws:server + +2. In another terminal, run demo: + + pnpm demo + +The demo will: +- run `sendText()` first (text-only sanity check), then +- connect to `VOICE_WS_ENDPOINT` if provided. + +## Scripts + +- `pnpm build` – build TypeScript +- `pnpm dev` – watch TypeScript +- `pnpm demo` – run demo client +- `pnpm ws:server` – run local test WebSocket server + +## Notes + +- If `VOICE_WS_ENDPOINT` is empty, WebSocket connect is skipped. +- The sample WS server sends a mock `transcript` message for end-to-end testing. diff --git a/example/demo.ts b/example/demo.ts new file mode 100644 index 0000000..032fb64 --- /dev/null +++ b/example/demo.ts @@ -0,0 +1,58 @@ +import "dotenv/config"; +import { VoiceAgent } from "../src"; +import { tool } from "ai"; +import { z } from "zod"; +import { openai } from "@ai-sdk/openai"; + +// 1. Define Tools using standard AI SDK +const weatherTool = tool({ + description: 'Get the weather in a location', + inputSchema: z.object({ + location: z.string().describe('The location to get the weather for'), + }), + execute: async ({ location }) => ({ + location, + temperature: 72 + Math.floor(Math.random() * 21) - 10, + }), +}); + +// 2. Initialize Agent +const agent = new VoiceAgent({ + model: openai('gpt-4o'), + instructions: "You are a helpful voice assistant. Use tools when needed.", + endpoint: process.env.VOICE_WS_ENDPOINT, + tools: { + getWeather: weatherTool, // Pass the AI SDK tool directly + }, +}); + +// 3. Handle Events +agent.on("connected", () => console.log("Connected to WebSocket")); + +// Handle incoming audio from AI (play this to user) +agent.on("audio", (base64Audio: string) => { + // process.stdout.write(Buffer.from(base64Audio, 'base64')); +}); + +// Logs +agent.on("text", (msg: { role: string; text: string }) => console.log(`${msg.role}: ${msg.text}`)); +agent.on("tool_start", ({ name }: { name: string }) => console.log(`[System] Calling ${name}...`)); + +// 4. Start (wrap in async function since we can't use top-level await) +(async () => { + try { + // For now: text-only sanity check, no voice pipeline required. + await agent.sendText("What is the weather in Berlin?"); + + // Optional: connect only when an endpoint is provided. + if (process.env.VOICE_WS_ENDPOINT) { + await agent.connect(process.env.VOICE_WS_ENDPOINT); + console.log("Agent connected successfully"); + } + } catch (error) { + console.error("Agent run failed:", error); + } +})(); + +// 5. Simulate sending audio (in a real app, stream microphone data here) +// agent.sendAudio("Base64EncodedPCM16AudioData..."); \ No newline at end of file diff --git a/example/ws-server.ts b/example/ws-server.ts new file mode 100644 index 0000000..1984b0c --- /dev/null +++ b/example/ws-server.ts @@ -0,0 +1,43 @@ +import "dotenv/config"; +import { WebSocketServer } from "ws"; + +const endpoint = process.env.VOICE_WS_ENDPOINT || "ws://localhost:8080"; +const url = new URL(endpoint); +const port = Number(url.port || 8080); +const host = url.hostname || "localhost"; + +const wss = new WebSocketServer({ port, host }); + +wss.on("listening", () => { + console.log(`[ws-server] listening on ${endpoint}`); +}); + +wss.on("connection", (socket) => { + console.log("[ws-server] client connected"); + + // Send a sample transcript to test text pipeline end-to-end. + setTimeout(() => { + socket.send( + JSON.stringify({ + type: "transcript", + text: "What is the weather in Berlin?", + }), + ); + }, 500); + + socket.on("message", (data) => { + try { + const msg = JSON.parse(data.toString()) as { + type?: string; + text?: string; + }; + console.log("[ws-server] <-", msg); + } catch { + console.log("[ws-server] <- raw", data.toString()); + } + }); + + socket.on("close", () => { + console.log("[ws-server] client disconnected"); + }); +}); diff --git a/package.json b/package.json new file mode 100644 index 0000000..7ea31fc --- /dev/null +++ b/package.json @@ -0,0 +1,37 @@ +{ + "name": "voice-agent-ai-sdk", + "version": "0.0.1", + "description": "Voice AI Agent with ai-sdk", + "main": "src/index.ts", + "scripts": { + "build": "tsc", + "dev": "tsc -w", + "demo": "tsx example/demo.ts", + "ws:server": "tsx example/ws-server.ts", + "prepublishOnly": "pnpm build" + }, + "keywords": [ + "voice", + "websocket", + "ai", + "agent", + "tools" + ], + "author": "Bijit Mondal", + "license": "MIT", + "packageManager": "pnpm@10.27.0", + "devDependencies": { + "@ai-sdk/openai": "^3.0.28", + "@types/node": "^25.2.3", + "@types/ws": "^8.18.1", + "tsx": "^4.20.5", + "typescript": "^5.9.3" + }, + "dependencies": { + "ai": "^6.0.85", + "dotenv": "^17.2.3", + "ws": "^8.19.0", + "zod": "^4.3.6", + "zod-to-json-schema": "^3.25.1" + } +} \ No newline at end of file diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml new file mode 100644 index 0000000..c33a08e --- /dev/null +++ b/pnpm-lock.yaml @@ -0,0 +1,493 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + ai: + specifier: ^6.0.85 + version: 6.0.85(zod@4.3.6) + dotenv: + specifier: ^17.2.3 + version: 17.3.1 + ws: + specifier: ^8.19.0 + version: 8.19.0 + zod: + specifier: ^4.3.6 + version: 4.3.6 + zod-to-json-schema: + specifier: ^3.25.1 + version: 3.25.1(zod@4.3.6) + devDependencies: + '@ai-sdk/openai': + specifier: ^3.0.28 + version: 3.0.28(zod@4.3.6) + '@types/node': + specifier: ^25.2.3 + version: 25.2.3 + '@types/ws': + specifier: ^8.18.1 + version: 8.18.1 + tsx: + specifier: ^4.20.5 + version: 4.21.0 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + +packages: + + '@ai-sdk/gateway@3.0.45': + resolution: {integrity: sha512-ZB6kHV+D8mLCRnkpWotLCV/rZK4NiODxx4Kv7JdT9QmQknbG/scbE4iyoT4JLFdULA8Y/IVbMvyE0Nwq3Dceqw==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/openai@3.0.28': + resolution: {integrity: sha512-m2Dm6fwUzMksqnPrd5f/WZ4cZ9GTZHpzsVO6jxKQwwc84gFHzAFZmUCG0C5mV7XlPOw4mwaiYV3HfLiEfphvvA==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider-utils@4.0.15': + resolution: {integrity: sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + '@ai-sdk/provider@3.0.8': + resolution: {integrity: sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ==} + engines: {node: '>=18'} + + '@esbuild/aix-ppc64@0.27.3': + resolution: {integrity: sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [aix] + + '@esbuild/android-arm64@0.27.3': + resolution: {integrity: sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [android] + + '@esbuild/android-arm@0.27.3': + resolution: {integrity: sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==} + engines: {node: '>=18'} + cpu: [arm] + os: [android] + + '@esbuild/android-x64@0.27.3': + resolution: {integrity: sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [android] + + '@esbuild/darwin-arm64@0.27.3': + resolution: {integrity: sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [darwin] + + '@esbuild/darwin-x64@0.27.3': + resolution: {integrity: sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==} + engines: {node: '>=18'} + cpu: [x64] + os: [darwin] + + '@esbuild/freebsd-arm64@0.27.3': + resolution: {integrity: sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==} + engines: {node: '>=18'} + cpu: [arm64] + os: [freebsd] + + '@esbuild/freebsd-x64@0.27.3': + resolution: {integrity: sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==} + engines: {node: '>=18'} + cpu: [x64] + os: [freebsd] + + '@esbuild/linux-arm64@0.27.3': + resolution: {integrity: sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==} + engines: {node: '>=18'} + cpu: [arm64] + os: [linux] + + '@esbuild/linux-arm@0.27.3': + resolution: {integrity: sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==} + engines: {node: '>=18'} + cpu: [arm] + os: [linux] + + '@esbuild/linux-ia32@0.27.3': + resolution: {integrity: sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==} + engines: {node: '>=18'} + cpu: [ia32] + os: [linux] + + '@esbuild/linux-loong64@0.27.3': + resolution: {integrity: sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==} + engines: {node: '>=18'} + cpu: [loong64] + os: [linux] + + '@esbuild/linux-mips64el@0.27.3': + resolution: {integrity: sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==} + engines: {node: '>=18'} + cpu: [mips64el] + os: [linux] + + '@esbuild/linux-ppc64@0.27.3': + resolution: {integrity: sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==} + engines: {node: '>=18'} + cpu: [ppc64] + os: [linux] + + '@esbuild/linux-riscv64@0.27.3': + resolution: {integrity: sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==} + engines: {node: '>=18'} + cpu: [riscv64] + os: [linux] + + '@esbuild/linux-s390x@0.27.3': + resolution: {integrity: sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==} + engines: {node: '>=18'} + cpu: [s390x] + os: [linux] + + '@esbuild/linux-x64@0.27.3': + resolution: {integrity: sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==} + engines: {node: '>=18'} + cpu: [x64] + os: [linux] + + '@esbuild/netbsd-arm64@0.27.3': + resolution: {integrity: sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [netbsd] + + '@esbuild/netbsd-x64@0.27.3': + resolution: {integrity: sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==} + engines: {node: '>=18'} + cpu: [x64] + os: [netbsd] + + '@esbuild/openbsd-arm64@0.27.3': + resolution: {integrity: sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openbsd] + + '@esbuild/openbsd-x64@0.27.3': + resolution: {integrity: sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==} + engines: {node: '>=18'} + cpu: [x64] + os: [openbsd] + + '@esbuild/openharmony-arm64@0.27.3': + resolution: {integrity: sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==} + engines: {node: '>=18'} + cpu: [arm64] + os: [openharmony] + + '@esbuild/sunos-x64@0.27.3': + resolution: {integrity: sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==} + engines: {node: '>=18'} + cpu: [x64] + os: [sunos] + + '@esbuild/win32-arm64@0.27.3': + resolution: {integrity: sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==} + engines: {node: '>=18'} + cpu: [arm64] + os: [win32] + + '@esbuild/win32-ia32@0.27.3': + resolution: {integrity: sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==} + engines: {node: '>=18'} + cpu: [ia32] + os: [win32] + + '@esbuild/win32-x64@0.27.3': + resolution: {integrity: sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==} + engines: {node: '>=18'} + cpu: [x64] + os: [win32] + + '@opentelemetry/api@1.9.0': + resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} + engines: {node: '>=8.0.0'} + + '@standard-schema/spec@1.1.0': + resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} + + '@types/node@25.2.3': + resolution: {integrity: sha512-m0jEgYlYz+mDJZ2+F4v8D1AyQb+QzsNqRuI7xg1VQX/KlKS0qT9r1Mo16yo5F/MtifXFgaofIFsdFMox2SxIbQ==} + + '@types/ws@8.18.1': + resolution: {integrity: sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==} + + '@vercel/oidc@3.1.0': + resolution: {integrity: sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w==} + engines: {node: '>= 20'} + + ai@6.0.85: + resolution: {integrity: sha512-2bP7M+OcNQGSIH8I3jdujUadxj4tAwuHBvLhpmDSlcjRXXry3zNGEajjjRraOjObHMO/Yqa37PJWhPVHIHt2TQ==} + engines: {node: '>=18'} + peerDependencies: + zod: ^3.25.76 || ^4.1.8 + + dotenv@17.3.1: + resolution: {integrity: sha512-IO8C/dzEb6O3F9/twg6ZLXz164a2fhTnEWb95H23Dm4OuN+92NmEAlTrupP9VW6Jm3sO26tQlqyvyi4CsnY9GA==} + engines: {node: '>=12'} + + esbuild@0.27.3: + resolution: {integrity: sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==} + engines: {node: '>=18'} + hasBin: true + + eventsource-parser@3.0.6: + resolution: {integrity: sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==} + engines: {node: '>=18.0.0'} + + fsevents@2.3.3: + resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + + get-tsconfig@4.13.6: + resolution: {integrity: sha512-shZT/QMiSHc/YBLxxOkMtgSid5HFoauqCE3/exfsEcwg1WkeqjG+V40yBbBrsD+jW2HDXcs28xOfcbm2jI8Ddw==} + + json-schema@0.4.0: + resolution: {integrity: sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==} + + resolve-pkg-maps@1.0.0: + resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} + + tsx@4.21.0: + resolution: {integrity: sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==} + engines: {node: '>=18.0.0'} + hasBin: true + + typescript@5.9.3: + resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} + engines: {node: '>=14.17'} + hasBin: true + + undici-types@7.16.0: + resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} + + ws@8.19.0: + resolution: {integrity: sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==} + engines: {node: '>=10.0.0'} + peerDependencies: + bufferutil: ^4.0.1 + utf-8-validate: '>=5.0.2' + peerDependenciesMeta: + bufferutil: + optional: true + utf-8-validate: + optional: true + + zod-to-json-schema@3.25.1: + resolution: {integrity: sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==} + peerDependencies: + zod: ^3.25 || ^4 + + zod@4.3.6: + resolution: {integrity: sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==} + +snapshots: + + '@ai-sdk/gateway@3.0.45(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.15(zod@4.3.6) + '@vercel/oidc': 3.1.0 + zod: 4.3.6 + + '@ai-sdk/openai@3.0.28(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.15(zod@4.3.6) + zod: 4.3.6 + + '@ai-sdk/provider-utils@4.0.15(zod@4.3.6)': + dependencies: + '@ai-sdk/provider': 3.0.8 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.6 + zod: 4.3.6 + + '@ai-sdk/provider@3.0.8': + dependencies: + json-schema: 0.4.0 + + '@esbuild/aix-ppc64@0.27.3': + optional: true + + '@esbuild/android-arm64@0.27.3': + optional: true + + '@esbuild/android-arm@0.27.3': + optional: true + + '@esbuild/android-x64@0.27.3': + optional: true + + '@esbuild/darwin-arm64@0.27.3': + optional: true + + '@esbuild/darwin-x64@0.27.3': + optional: true + + '@esbuild/freebsd-arm64@0.27.3': + optional: true + + '@esbuild/freebsd-x64@0.27.3': + optional: true + + '@esbuild/linux-arm64@0.27.3': + optional: true + + '@esbuild/linux-arm@0.27.3': + optional: true + + '@esbuild/linux-ia32@0.27.3': + optional: true + + '@esbuild/linux-loong64@0.27.3': + optional: true + + '@esbuild/linux-mips64el@0.27.3': + optional: true + + '@esbuild/linux-ppc64@0.27.3': + optional: true + + '@esbuild/linux-riscv64@0.27.3': + optional: true + + '@esbuild/linux-s390x@0.27.3': + optional: true + + '@esbuild/linux-x64@0.27.3': + optional: true + + '@esbuild/netbsd-arm64@0.27.3': + optional: true + + '@esbuild/netbsd-x64@0.27.3': + optional: true + + '@esbuild/openbsd-arm64@0.27.3': + optional: true + + '@esbuild/openbsd-x64@0.27.3': + optional: true + + '@esbuild/openharmony-arm64@0.27.3': + optional: true + + '@esbuild/sunos-x64@0.27.3': + optional: true + + '@esbuild/win32-arm64@0.27.3': + optional: true + + '@esbuild/win32-ia32@0.27.3': + optional: true + + '@esbuild/win32-x64@0.27.3': + optional: true + + '@opentelemetry/api@1.9.0': {} + + '@standard-schema/spec@1.1.0': {} + + '@types/node@25.2.3': + dependencies: + undici-types: 7.16.0 + + '@types/ws@8.18.1': + dependencies: + '@types/node': 25.2.3 + + '@vercel/oidc@3.1.0': {} + + ai@6.0.85(zod@4.3.6): + dependencies: + '@ai-sdk/gateway': 3.0.45(zod@4.3.6) + '@ai-sdk/provider': 3.0.8 + '@ai-sdk/provider-utils': 4.0.15(zod@4.3.6) + '@opentelemetry/api': 1.9.0 + zod: 4.3.6 + + dotenv@17.3.1: {} + + esbuild@0.27.3: + optionalDependencies: + '@esbuild/aix-ppc64': 0.27.3 + '@esbuild/android-arm': 0.27.3 + '@esbuild/android-arm64': 0.27.3 + '@esbuild/android-x64': 0.27.3 + '@esbuild/darwin-arm64': 0.27.3 + '@esbuild/darwin-x64': 0.27.3 + '@esbuild/freebsd-arm64': 0.27.3 + '@esbuild/freebsd-x64': 0.27.3 + '@esbuild/linux-arm': 0.27.3 + '@esbuild/linux-arm64': 0.27.3 + '@esbuild/linux-ia32': 0.27.3 + '@esbuild/linux-loong64': 0.27.3 + '@esbuild/linux-mips64el': 0.27.3 + '@esbuild/linux-ppc64': 0.27.3 + '@esbuild/linux-riscv64': 0.27.3 + '@esbuild/linux-s390x': 0.27.3 + '@esbuild/linux-x64': 0.27.3 + '@esbuild/netbsd-arm64': 0.27.3 + '@esbuild/netbsd-x64': 0.27.3 + '@esbuild/openbsd-arm64': 0.27.3 + '@esbuild/openbsd-x64': 0.27.3 + '@esbuild/openharmony-arm64': 0.27.3 + '@esbuild/sunos-x64': 0.27.3 + '@esbuild/win32-arm64': 0.27.3 + '@esbuild/win32-ia32': 0.27.3 + '@esbuild/win32-x64': 0.27.3 + + eventsource-parser@3.0.6: {} + + fsevents@2.3.3: + optional: true + + get-tsconfig@4.13.6: + dependencies: + resolve-pkg-maps: 1.0.0 + + json-schema@0.4.0: {} + + resolve-pkg-maps@1.0.0: {} + + tsx@4.21.0: + dependencies: + esbuild: 0.27.3 + get-tsconfig: 4.13.6 + optionalDependencies: + fsevents: 2.3.3 + + typescript@5.9.3: {} + + undici-types@7.16.0: {} + + ws@8.19.0: {} + + zod-to-json-schema@3.25.1(zod@4.3.6): + dependencies: + zod: 4.3.6 + + zod@4.3.6: {} diff --git a/src/VoiceAgent.ts b/src/VoiceAgent.ts new file mode 100644 index 0000000..e0ec61b --- /dev/null +++ b/src/VoiceAgent.ts @@ -0,0 +1,136 @@ +import { WebSocket } from "ws"; +import { EventEmitter } from "events"; +import { generateText, LanguageModel, stepCountIs, type Tool } from "ai"; + +export interface VoiceAgentOptions { + model: LanguageModel; /// AI SDK Model (e.g., openai('gpt-4o')) + instructions?: string; + stopWhen?: NonNullable[0]["stopWhen"]>; + tools?: Record; + endpoint?: string; +} + +export class VoiceAgent extends EventEmitter { + private socket?: WebSocket; + private tools: Record = {}; + private model: LanguageModel; + private instructions: string; + private stopWhen: NonNullable[0]["stopWhen"]>; + private endpoint?: string; + private isConnected = false; + + constructor(options: VoiceAgentOptions) { + super(); + this.model = options.model; + this.instructions = + options.instructions || "You are a helpful voice assistant."; + this.stopWhen = options.stopWhen || stepCountIs(5); + this.endpoint = options.endpoint; + if (options.tools) { + this.tools = { ...options.tools }; + } + } + + private setupListeners() { + if (!this.socket) return; + + this.socket.on("message", async (data) => { + try { + const message = JSON.parse(data.toString()); + + // Example: Handle transcribed text from the client/STT + if (message.type === "transcript") { + await this.processUserInput(message.text); + } + // Handle audio data + if (message.type === "audio") { + this.emit("audio", message.data); + } + } catch (err) { + console.error("Failed to process message:", err); + } + }); + + this.socket.on("close", () => { + console.log("Disconnected"); + this.isConnected = false; + this.emit("disconnected"); + }); + } + + public registerTools(tools: Record) { + this.tools = { ...this.tools, ...tools }; + } + + public async connect(url?: string): Promise { + return new Promise((resolve, reject) => { + try { + // Use provided URL, configured endpoint, or default URL + const wsUrl = url || this.endpoint || "ws://localhost:8080"; + this.socket = new WebSocket(wsUrl); + this.setupListeners(); + + this.socket.once("open", () => { + this.isConnected = true; + this.emit("connected"); + resolve(); + }); + + this.socket.once("error", (error) => { + reject(error); + }); + } catch (error) { + reject(error); + } + }); + } + + public async sendText(text: string): Promise { + await this.processUserInput(text); + } + + public sendAudio(audioData: string): void { + if (this.socket && this.isConnected) { + this.socket.send(JSON.stringify({ + type: "audio", + data: audioData + })); + } + } + + private async processUserInput(text: string) { + // Emit text event for incoming user input + this.emit("text", { role: "user", text }); + + const result = await generateText({ + model: this.model, + system: this.instructions, + prompt: text, + tools: this.tools, + stopWhen: this.stopWhen, + }); + + for (const toolCall of result.toolCalls ?? []) { + this.emit("tool_start", { name: toolCall.toolName }); + } + + // Emit text event for assistant response + this.emit("text", { role: "assistant", text: result.text }); + + // Send the response back (either text to be TTSed or tool results) + if (this.socket && this.isConnected) { + this.socket.send( + JSON.stringify({ + type: "response", + text: result.text, + toolCalls: result.toolCalls, + toolResults: result.toolResults, + }), + ); + } + } + + startListening() { + console.log("Starting voice agent..."); + } +} diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..e4c28c3 --- /dev/null +++ b/src/index.ts @@ -0,0 +1 @@ +export { VoiceAgent } from "./VoiceAgent"; diff --git a/src/utils/StreamBuffer.ts b/src/utils/StreamBuffer.ts new file mode 100644 index 0000000..e69de29 diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..70d7524 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,19 @@ +{ + "compilerOptions": { + "target": "ES2024", + "module": "commonjs", + "lib": ["ES2024"], + "types": ["node", "ws"], + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "skipLibCheck": true, + "outDir": "./dist", + "rootDir": "./src", + "declaration": true, + "declarationMap": true, + "sourceMap": true + }, + "include": ["src/**/*"] +} +