feat: Introduce new core components for conversation and speech management

- Added ConversationManager for managing conversation history with configurable limits. - Implemented InputQueue for serial processing of input items. - Created SpeechManager for handling text-to-speech generation and streaming. - Developed StreamProcessor for processing LLM streams and forwarding events. - Added TranscriptionManager for audio transcription using AI SDK. - Introduced WebSocketManager for managing WebSocket connections and messaging. - Updated VoiceAgent to support new architecture and improved socket handling. - Refactored index files to export new core components.
2026-03-02 18:36:39 +00:00 · 2026-02-23 16:15:49 +05:30
parent 4dd30b89c0
commit 5e7eb469ae
71 changed files with 5175 additions and 19 deletions
--- a/example/frames/frame_00000_2026-02-23T10-41-46-424Z.webp
+++ b/example/frames/frame_00000_2026-02-23T10-41-46-424Z.webp
--- a/example/frames/frame_00001_2026-02-23T10-41-50-271Z.webp
+++ b/example/frames/frame_00001_2026-02-23T10-41-50-271Z.webp
--- a/example/frames/frame_00002_2026-02-23T10-41-51-387Z.webp
+++ b/example/frames/frame_00002_2026-02-23T10-41-51-387Z.webp
--- a/example/frames/frame_00003_2026-02-23T10-41-56-374Z.webp
+++ b/example/frames/frame_00003_2026-02-23T10-41-56-374Z.webp
--- a/example/frames/frame_00004_2026-02-23T10-42-01-379Z.webp
+++ b/example/frames/frame_00004_2026-02-23T10-42-01-379Z.webp
--- a/example/frames/frame_00005_2026-02-23T10-42-06-375Z.webp
+++ b/example/frames/frame_00005_2026-02-23T10-42-06-375Z.webp
--- a/example/frames/frame_00006_2026-02-23T10-42-07-405Z.webp
+++ b/example/frames/frame_00006_2026-02-23T10-42-07-405Z.webp
--- a/example/frames/frame_00007_2026-02-23T10-42-11-278Z.webp
+++ b/example/frames/frame_00007_2026-02-23T10-42-11-278Z.webp
--- a/example/frames/frame_00008_2026-02-23T10-42-11-381Z.webp
+++ b/example/frames/frame_00008_2026-02-23T10-42-11-381Z.webp
--- a/example/frames/frame_00009_2026-02-23T10-42-16-395Z.webp
+++ b/example/frames/frame_00009_2026-02-23T10-42-16-395Z.webp
--- a/example/frames/frame_00010_2026-02-23T10-42-21-373Z.webp
+++ b/example/frames/frame_00010_2026-02-23T10-42-21-373Z.webp
--- a/example/frames/frame_00011_2026-02-23T10-42-26-364Z.webp
+++ b/example/frames/frame_00011_2026-02-23T10-42-26-364Z.webp
--- a/example/ws-server-video.ts
+++ b/example/ws-server-video.ts
@@ -1,7 +1,7 @@
 // ws-server-video.ts
 import "dotenv/config";
 import { WebSocketServer } from "ws";
-import { VideoAgent } from "../src/VideoAgent";   // adjust path
+import { VideoAgent } from "../src/VideoAgent.new";   // adjust path
 import { tool } from "ai";
 import { z } from "zod";
 import { openai } from "@ai-sdk/openai";
@@ -91,7 +91,7 @@ wss.on("connection", (socket) => {
 You can SEE what the user is showing via webcam.
 Describe what you see when it helps answer the question.
 Keep spoken answers concise and natural.`,
-        voice: "alloy",
+        voice: "echo",
        streamingSpeech: {
            minChunkSize: 25,
            maxChunkSize: 140,