Files
boris/src/hooks/useTextToSpeech.ts

244 lines
7.4 KiB
TypeScript

import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
// Web Speech API types
type SpeechSynthesisVoice = {
name: string
voiceURI: string
lang: string
localService: boolean
default: boolean
}
export interface UseTTSOptions {
defaultLang?: string
defaultRate?: number
defaultPitch?: number
defaultVolume?: number
}
export interface UseTTS {
supported: boolean
speaking: boolean
paused: boolean
voices: SpeechSynthesisVoice[]
voice: SpeechSynthesisVoice | null
rate: number
pitch: number
volume: number
setVoice: (v: SpeechSynthesisVoice | null) => void
setRate: (r: number) => void
setPitch: (p: number) => void
setVolume: (v: number) => void
speak: (text: string, langOverride?: string) => void
pause: () => void
resume: () => void
stop: () => void
}
export function useTextToSpeech(options: UseTTSOptions = {}): UseTTS {
const synth = typeof window !== 'undefined' ? window.speechSynthesis : undefined
const supported = !!synth
const [voices, setVoices] = useState<SpeechSynthesisVoice[]>([])
const [voice, setVoice] = useState<SpeechSynthesisVoice | null>(null)
const [speaking, setSpeaking] = useState(false)
const [paused, setPaused] = useState(false)
const [rate, setRate] = useState(options.defaultRate ?? 2.1)
const [pitch, setPitch] = useState(options.defaultPitch ?? 1)
const [volume, setVolume] = useState(options.defaultVolume ?? 1)
const defaultLang = options.defaultLang || (typeof navigator !== 'undefined' ? navigator.language : 'en')
const utteranceRef = useRef<SpeechSynthesisUtterance | null>(null)
const spokenTextRef = useRef<string>('')
const charIndexRef = useRef<number>(0)
// Update rate when defaultRate option changes
useEffect(() => {
if (options.defaultRate !== undefined) {
console.debug('[tts] defaultRate changed ->', options.defaultRate)
setRate(options.defaultRate)
}
}, [options.defaultRate])
// Load voices (async in many browsers)
useEffect(() => {
if (!supported) return
const load = () => {
const v = synth!.getVoices()
setVoices(v)
if (!voice && v.length) {
const byLang = v.find(x => x.lang?.toLowerCase().startsWith(defaultLang.toLowerCase()))
setVoice(byLang || v[0] || null)
console.debug('[tts] voices loaded', { total: v.length, picked: (byLang || v[0] || null)?.lang })
}
}
load()
const handleVoicesChanged = () => load()
synth!.addEventListener('voiceschanged', handleVoicesChanged)
return () => {
synth!.removeEventListener('voiceschanged', handleVoicesChanged)
}
}, [supported, defaultLang, voice, synth])
const createUtterance = (text: string): SpeechSynthesisUtterance => {
const SpeechSynthesisUtteranceConstructor = (window as Window & typeof globalThis).SpeechSynthesisUtterance
const u = new SpeechSynthesisUtteranceConstructor(text) as SpeechSynthesisUtterance
u.lang = voice?.lang || defaultLang
if (voice) u.voice = voice
u.rate = rate
u.pitch = pitch
u.volume = volume
const self = u
u.onstart = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onstart')
setSpeaking(true)
setPaused(false)
}
u.onpause = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onpause')
setPaused(true)
}
u.onresume = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onresume')
setPaused(false)
}
u.onend = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onend')
setSpeaking(false)
setPaused(false)
utteranceRef.current = null
}
u.onerror = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onerror')
setSpeaking(false)
setPaused(false)
utteranceRef.current = null
}
u.onboundary = (ev: SpeechSynthesisEvent) => {
if (utteranceRef.current !== self) return
if (typeof ev.charIndex === 'number') {
const newIndex = ev.charIndex
if (newIndex > charIndexRef.current) {
charIndexRef.current = newIndex
}
}
}
return u
}
const stop = useCallback(() => {
if (!supported) return
console.debug('[tts] stop')
synth!.cancel()
setSpeaking(false)
setPaused(false)
utteranceRef.current = null
charIndexRef.current = 0
spokenTextRef.current = ''
}, [supported, synth])
const speak = useCallback((text: string, langOverride?: string) => {
if (!supported || !text?.trim()) return
console.debug('[tts] speak', { len: text.length, rate })
synth!.cancel()
spokenTextRef.current = text
charIndexRef.current = 0
const u = createUtterance(text)
if (langOverride) u.lang = langOverride
utteranceRef.current = u
synth!.speak(u)
}, [supported, synth, voice, rate, pitch, volume, defaultLang])
const pause = useCallback(() => {
if (!supported) return
if (synth!.speaking && !synth!.paused) {
console.debug('[tts] pause')
synth!.pause()
setPaused(true)
}
}, [supported, synth])
const resume = useCallback(() => {
if (!supported) return
if (synth!.speaking && synth!.paused) {
console.debug('[tts] resume')
synth!.resume()
setPaused(false)
}
}, [supported, synth])
// Update rate in real-time: while speaking, restart from last boundary with new rate.
useEffect(() => {
if (!supported) return
if (!utteranceRef.current) return
console.debug('[tts] rate change', { rate, speaking: synth!.speaking, paused: synth!.paused, charIndex: charIndexRef.current })
if (synth!.speaking && !synth!.paused) {
const fullText = spokenTextRef.current
const startIndex = Math.max(0, Math.min(charIndexRef.current, fullText.length - 1))
const remainingText = fullText.slice(startIndex)
console.debug('[tts] restart at new rate', { startIndex, remainingLen: remainingText.length })
synth!.cancel()
const u = createUtterance(remainingText)
utteranceRef.current = u
synth!.speak(u)
return
}
if (utteranceRef.current) {
utteranceRef.current.rate = rate
}
}, [rate, supported, synth])
const updateRate = useCallback((newRate: number) => {
setRate(newRate)
if (!supported) return
if (!utteranceRef.current) return
if (synth!.speaking && !synth!.paused) {
const fullText = spokenTextRef.current
const startIndex = Math.max(0, Math.min(charIndexRef.current, fullText.length - 1))
const remainingText = fullText.slice(startIndex)
console.debug('[tts] updateRate -> restart', { newRate, startIndex, remainingLen: remainingText.length })
synth!.cancel()
const u = createUtterance(remainingText)
// ensure the new rate is applied immediately on the new utterance
u.rate = newRate
utteranceRef.current = u
synth!.speak(u)
} else if (utteranceRef.current) {
console.debug('[tts] updateRate -> set on utterance', { newRate })
utteranceRef.current.rate = newRate
}
}, [supported, synth, createUtterance])
// stop TTS when unmounting
useEffect(() => stop, [stop])
return useMemo(() => ({
supported,
speaking,
paused,
voices,
voice,
rate,
setRate: updateRate,
pitch, setPitch,
volume, setVolume,
setVoice,
speak, pause, resume, stop
}), [supported, speaking, paused, voices, voice, rate, updateRate, pitch, volume, setVoice, speak, pause, resume, stop])
}