mirror of
https://github.com/Bijit-Mondal/VoiceAgent.git
synced 2026-03-02 18:36:39 +00:00
Refactor VoiceAgent: Extract types and default configurations into separate types.ts file; remove unused StreamBuffer file
This commit is contained in:
1692
src/VideoAgent.ts
Normal file
1692
src/VideoAgent.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,42 +11,14 @@ import {
|
||||
type TranscriptionModel,
|
||||
type SpeechModel,
|
||||
} from "ai";
|
||||
|
||||
/**
|
||||
* Represents a chunk of text to be converted to speech
|
||||
*/
|
||||
interface SpeechChunk {
|
||||
id: number;
|
||||
text: string;
|
||||
audioPromise?: Promise<Uint8Array | null>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for streaming speech behavior
|
||||
*/
|
||||
interface StreamingSpeechConfig {
|
||||
/** Minimum characters before generating speech for a chunk */
|
||||
minChunkSize: number;
|
||||
/** Maximum characters per chunk (will split at sentence boundary before this) */
|
||||
maxChunkSize: number;
|
||||
/** Whether to enable parallel TTS generation */
|
||||
parallelGeneration: boolean;
|
||||
/** Maximum number of parallel TTS requests */
|
||||
maxParallelRequests: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for conversation history memory management
|
||||
*/
|
||||
interface HistoryConfig {
|
||||
/** Maximum number of messages to keep in history. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
|
||||
maxMessages: number;
|
||||
/** Maximum total character count across all messages. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
|
||||
maxTotalChars: number;
|
||||
}
|
||||
|
||||
/** Default maximum audio input size (10 MB) */
|
||||
const DEFAULT_MAX_AUDIO_SIZE = 10 * 1024 * 1024;
|
||||
import {
|
||||
type SpeechChunk,
|
||||
type StreamingSpeechConfig,
|
||||
type HistoryConfig,
|
||||
DEFAULT_STREAMING_SPEECH_CONFIG,
|
||||
DEFAULT_HISTORY_CONFIG,
|
||||
DEFAULT_MAX_AUDIO_SIZE,
|
||||
} from "./types";
|
||||
|
||||
export interface VoiceAgentOptions {
|
||||
model: LanguageModel; // AI SDK Model for chat (e.g., openai('gpt-4o'))
|
||||
@@ -126,17 +98,13 @@ export class VoiceAgent extends EventEmitter {
|
||||
|
||||
// Initialize streaming speech config with defaults
|
||||
this.streamingSpeechConfig = {
|
||||
minChunkSize: 50,
|
||||
maxChunkSize: 200,
|
||||
parallelGeneration: true,
|
||||
maxParallelRequests: 3,
|
||||
...DEFAULT_STREAMING_SPEECH_CONFIG,
|
||||
...options.streamingSpeech,
|
||||
};
|
||||
|
||||
// Initialize history config with defaults
|
||||
this.historyConfig = {
|
||||
maxMessages: 100,
|
||||
maxTotalChars: 0, // unlimited by default
|
||||
...DEFAULT_HISTORY_CONFIG,
|
||||
...options.history,
|
||||
};
|
||||
}
|
||||
|
||||
21
src/index.ts
21
src/index.ts
@@ -1 +1,22 @@
|
||||
// Agents
|
||||
export { VoiceAgent, type VoiceAgentOptions } from "./VoiceAgent";
|
||||
export {
|
||||
VideoAgent,
|
||||
type VideoAgentOptions,
|
||||
type VideoFrame,
|
||||
type AudioData,
|
||||
type VideoAgentConfig,
|
||||
type FrameContext,
|
||||
type FrameTriggerReason,
|
||||
} from "./VideoAgent";
|
||||
|
||||
// Shared types
|
||||
export {
|
||||
type SpeechChunk,
|
||||
type StreamingSpeechConfig,
|
||||
type HistoryConfig,
|
||||
type StopWhenCondition,
|
||||
DEFAULT_STREAMING_SPEECH_CONFIG,
|
||||
DEFAULT_HISTORY_CONFIG,
|
||||
DEFAULT_MAX_AUDIO_SIZE,
|
||||
} from "./types";
|
||||
|
||||
60
src/types.ts
Normal file
60
src/types.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import type { streamText } from "ai";
|
||||
|
||||
/**
|
||||
* Represents a chunk of text to be converted to speech
|
||||
*/
|
||||
export interface SpeechChunk {
|
||||
id: number;
|
||||
text: string;
|
||||
audioPromise?: Promise<Uint8Array | null>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for streaming speech behavior
|
||||
*/
|
||||
export interface StreamingSpeechConfig {
|
||||
/** Minimum characters before generating speech for a chunk */
|
||||
minChunkSize: number;
|
||||
/** Maximum characters per chunk (will split at sentence boundary before this) */
|
||||
maxChunkSize: number;
|
||||
/** Whether to enable parallel TTS generation */
|
||||
parallelGeneration: boolean;
|
||||
/** Maximum number of parallel TTS requests */
|
||||
maxParallelRequests: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for conversation history memory management
|
||||
*/
|
||||
export interface HistoryConfig {
|
||||
/** Maximum number of messages to keep in history. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
|
||||
maxMessages: number;
|
||||
/** Maximum total character count across all messages. When exceeded, oldest messages are trimmed. Set to 0 for unlimited. */
|
||||
maxTotalChars: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default streaming speech configuration
|
||||
*/
|
||||
export const DEFAULT_STREAMING_SPEECH_CONFIG: StreamingSpeechConfig = {
|
||||
minChunkSize: 50,
|
||||
maxChunkSize: 200,
|
||||
parallelGeneration: true,
|
||||
maxParallelRequests: 3,
|
||||
};
|
||||
|
||||
/**
|
||||
* Default history configuration
|
||||
*/
|
||||
export const DEFAULT_HISTORY_CONFIG: HistoryConfig = {
|
||||
maxMessages: 100,
|
||||
maxTotalChars: 0, // unlimited by default
|
||||
};
|
||||
|
||||
/** Default maximum audio input size (10 MB) */
|
||||
export const DEFAULT_MAX_AUDIO_SIZE = 10 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* Default stop condition type from streamText
|
||||
*/
|
||||
export type StopWhenCondition = NonNullable<Parameters<typeof streamText>[0]["stopWhen"]>;
|
||||
Reference in New Issue
Block a user