Merge pull request #22 from dergigi/tts

feat: Add comprehensive Text-to-Speech (TTS) functionality
This commit is contained in:
Gigi
2025-10-20 23:03:22 +02:00
committed by GitHub
9 changed files with 466 additions and 2 deletions

View File

@@ -11,6 +11,7 @@
- **Distractionfree view**: Clean typography, optional hero image, summary, and published date.
- **Reading time**: Displays estimated reading time for text or duration for supported videos.
- **Progress**: Reading progress indicator with completion state.
- **TexttoSpeech**: Listen to articles with browsernative TTS; play/pause/stop controls with adjustable speed (0.81.6x).
- **Menus**: Quick actions to open, share, or copy links (for both Nostr and web content).
- **Performance**: Lightweight fetching and caching for speed; skeleton loaders to avoid empty flashes.

21
package-lock.json generated
View File

@@ -1,12 +1,12 @@
{
"name": "boris",
"version": "0.9.0",
"version": "0.9.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "boris",
"version": "0.9.0",
"version": "0.9.1",
"dependencies": {
"@fortawesome/fontawesome-svg-core": "^7.1.0",
"@fortawesome/free-regular-svg-icons": "^7.1.0",
@@ -35,6 +35,7 @@
"rehype-prism-plus": "^2.0.1",
"rehype-raw": "^7.0.0",
"remark-gfm": "^4.0.1",
"tinyld": "^1.3.4",
"use-pull-to-refresh": "^2.4.1"
},
"devDependencies": {
@@ -11215,6 +11216,22 @@
"url": "https://github.com/sponsors/jonschlinkert"
}
},
"node_modules/tinyld": {
"version": "1.3.4",
"resolved": "https://registry.npmjs.org/tinyld/-/tinyld-1.3.4.tgz",
"integrity": "sha512-u26CNoaInA4XpDU+8s/6Cq8xHc2T5M4fXB3ICfXPokUQoLzmPgSZU02TAkFwFMJCWTjk53gtkS8pETTreZwCqw==",
"license": "MIT",
"bin": {
"tinyld": "bin/tinyld.js",
"tinyld-heavy": "bin/tinyld-heavy.js",
"tinyld-light": "bin/tinyld-light.js"
},
"engines": {
"node": ">= 12.10.0",
"npm": ">= 6.12.0",
"yarn": ">= 1.20.0"
}
},
"node_modules/to-regex-range": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",

View File

@@ -38,6 +38,7 @@
"rehype-prism-plus": "^2.0.1",
"rehype-raw": "^7.0.0",
"remark-gfm": "^4.0.1",
"tinyld": "^1.3.4",
"use-pull-to-refresh": "^2.4.1"
},
"devDependencies": {

View File

@@ -46,6 +46,7 @@ import {
loadReadingPosition,
saveReadingPosition
} from '../services/readingPositionService'
import TTSControls from './TTSControls'
interface ContentPanelProps {
loading: boolean
@@ -321,6 +322,25 @@ const ContentPanel: React.FC<ContentPanelProps> = ({
const hasHighlights = relevantHighlights.length > 0
// Extract plain text for TTS
const baseHtml = useMemo(() => {
if (markdown) return renderedMarkdownHtml && finalHtml ? finalHtml : ''
return finalHtml || html || ''
}, [markdown, renderedMarkdownHtml, finalHtml, html])
const articleText = useMemo(() => {
const parts: string[] = []
if (title) parts.push(title)
if (summary) parts.push(summary)
if (baseHtml) {
const div = document.createElement('div')
div.innerHTML = baseHtml
const txt = (div.textContent || '').replace(/\s+/g, ' ').trim()
if (txt) parts.push(txt)
}
return parts.join('. ')
}, [title, summary, baseHtml])
// Determine if we're on a nostr-native article (/a/) or external URL (/r/)
const isNostrArticle = selectedUrl && selectedUrl.startsWith('nostr:')
const isExternalVideo = !isNostrArticle && !!selectedUrl && ['youtube', 'video'].includes(classifyUrl(selectedUrl).type)
@@ -759,6 +779,11 @@ const ContentPanel: React.FC<ContentPanelProps> = ({
highlights={relevantHighlights}
highlightVisibility={highlightVisibility}
/>
{isTextContent && articleText && (
<div style={{ padding: '0 0.75rem 0.5rem 0.75rem' }}>
<TTSControls text={articleText} defaultLang={navigator?.language} settings={settings} />
</div>
)}
{isExternalVideo ? (
<>
<div className="reader-video">

View File

@@ -12,6 +12,7 @@ import LayoutBehaviorSettings from './Settings/LayoutBehaviorSettings'
import ZapSettings from './Settings/ZapSettings'
import RelaySettings from './Settings/RelaySettings'
import PWASettings from './Settings/PWASettings'
import TTSSettings from './Settings/TTSSettings'
import { useRelayStatus } from '../hooks/useRelayStatus'
import VersionFooter from './VersionFooter'
@@ -45,6 +46,10 @@ const DEFAULT_SETTINGS: UserSettings = {
syncReadingPosition: true,
autoMarkAsReadOnCompletion: false,
hideBookmarksWithoutCreationDate: true,
ttsUseSystemLanguage: false,
ttsDetectContentLanguage: true,
ttsLanguageMode: 'content',
ttsDefaultSpeed: 2.1,
}
interface SettingsProps {
@@ -175,6 +180,7 @@ const Settings: React.FC<SettingsProps> = ({ settings, onSave, onClose, relayPoo
<MediaDisplaySettings settings={localSettings} onUpdate={handleUpdate} />
<ExploreSettings settings={localSettings} onUpdate={handleUpdate} />
<ZapSettings settings={localSettings} onUpdate={handleUpdate} />
<TTSSettings settings={localSettings} onUpdate={handleUpdate} />
<LayoutBehaviorSettings settings={localSettings} onUpdate={handleUpdate} />
<PWASettings settings={localSettings} onUpdate={handleUpdate} onClose={onClose} />
<RelaySettings relayStatuses={relayStatuses} onClose={onClose} />

View File

@@ -0,0 +1,58 @@
import React from 'react'
import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
import { faGauge } from '@fortawesome/free-solid-svg-icons'
import { UserSettings } from '../../services/settingsService'
interface TTSSettingsProps {
settings: UserSettings
onUpdate: (updates: Partial<UserSettings>) => void
}
const SPEED_OPTIONS = [0.8, 1, 1.2, 1.4, 1.6, 1.8, 2, 2.1, 2.4, 2.8, 3]
const TTSSettings: React.FC<TTSSettingsProps> = ({ settings, onUpdate }) => {
const currentSpeed = settings.ttsDefaultSpeed || 2.1
const handleCycleSpeed = () => {
const currentIndex = SPEED_OPTIONS.indexOf(currentSpeed)
const nextIndex = (currentIndex + 1) % SPEED_OPTIONS.length
onUpdate({ ttsDefaultSpeed: SPEED_OPTIONS[nextIndex] })
}
return (
<div className="settings-section">
<h3 className="section-title">Text-to-Speech</h3>
<div className="setting-group setting-inline">
<label>Default Playback Speed</label>
<div className="setting-buttons">
<button
type="button"
className="article-menu-btn"
onClick={handleCycleSpeed}
title="Cycle speed"
>
<FontAwesomeIcon icon={faGauge} />
<span>{currentSpeed}x</span>
</button>
</div>
</div>
<div className="setting-group setting-inline">
<label>Speaker language</label>
<div className="setting-control">
<select
value={settings.ttsLanguageMode || 'content'}
onChange={e => onUpdate({ ttsLanguageMode: (e.target.value as 'system' | 'content'), ttsUseSystemLanguage: e.target.value === 'system', ttsDetectContentLanguage: e.target.value !== 'system' })}
style={{ background: 'var(--color-bg-elevated)', color: 'var(--color-text)', border: '1px solid var(--color-border)', borderRadius: 6, padding: '0.25rem 0.5rem' }}
>
<option value="system">System Language</option>
<option value="content">Content (auto-detect)</option>
</select>
</div>
</div>
</div>
)
}
export default TTSSettings

View File

@@ -0,0 +1,101 @@
import React, { useMemo } from 'react'
import { useTextToSpeech } from '../hooks/useTextToSpeech'
import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
import { faPlay, faPause, faGauge } from '@fortawesome/free-solid-svg-icons'
import { UserSettings } from '../services/settingsService'
import { detect } from 'tinyld'
interface Props {
text: string
defaultLang?: string
className?: string
settings?: UserSettings
}
const SPEED_OPTIONS = [0.8, 1, 1.2, 1.4, 1.6, 1.8, 2, 2.1, 2.4, 2.8, 3]
const TTSControls: React.FC<Props> = ({ text, defaultLang, className, settings }) => {
const {
supported, speaking, paused,
speak, pause, resume,
rate, setRate
} = useTextToSpeech({ defaultLang, defaultRate: settings?.ttsDefaultSpeed })
const canPlay = supported && text?.trim().length > 0
const resolvedSystemLang = useMemo(() => {
const mode = settings?.ttsLanguageMode
if ((mode ? mode === 'system' : settings?.ttsUseSystemLanguage) === true) {
return navigator?.language?.split('-')[0]
}
return undefined
}, [settings?.ttsLanguageMode, settings?.ttsUseSystemLanguage])
const detectContentLang = useMemo(() => {
const mode = settings?.ttsLanguageMode
if (mode) return mode === 'content'
return settings?.ttsDetectContentLanguage !== false
}, [settings?.ttsLanguageMode, settings?.ttsDetectContentLanguage])
const handlePlayPause = () => {
if (!canPlay) return
if (!speaking) {
let langOverride: string | undefined
if (detectContentLang && text) {
try {
const lang = detect(text)
if (typeof lang === 'string' && lang.length >= 2) langOverride = lang.slice(0, 2)
} catch (err) {
console.debug('[tts][detect] failed', err)
}
}
if (!langOverride && resolvedSystemLang) {
langOverride = resolvedSystemLang
}
speak(text, langOverride)
} else if (paused) {
resume()
} else {
pause()
}
}
const handleCycleSpeed = () => {
const currentIndex = SPEED_OPTIONS.indexOf(rate)
const nextIndex = (currentIndex + 1) % SPEED_OPTIONS.length
const next = SPEED_OPTIONS[nextIndex]
console.debug('[tts][ui] cycle speed', { from: rate, to: next, speaking, paused })
setRate(next)
}
const playLabel = !speaking ? 'Listen' : (paused ? 'Resume' : 'Pause')
if (!supported) return null
return (
<div className={className || 'tts-controls'} style={{ display: 'flex', gap: '0.5rem', alignItems: 'center', flexWrap: 'wrap', justifyContent: 'flex-end' }}>
<button
type="button"
className="article-menu-btn"
onClick={handlePlayPause}
title={playLabel}
disabled={!canPlay}
>
<FontAwesomeIcon icon={!speaking ? faPlay : (paused ? faPlay : faPause)} />
</button>
<button
type="button"
className="article-menu-btn"
onClick={handleCycleSpeed}
title="Cycle speed"
>
<FontAwesomeIcon icon={faGauge} />
<span>{rate}x</span>
</button>
</div>
)
}
export default TTSControls

View File

@@ -0,0 +1,249 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
// Web Speech API types
type SpeechSynthesisVoice = {
name: string
voiceURI: string
lang: string
localService: boolean
default: boolean
}
export interface UseTTSOptions {
defaultLang?: string
defaultRate?: number
defaultPitch?: number
defaultVolume?: number
}
export interface UseTTS {
supported: boolean
speaking: boolean
paused: boolean
voices: SpeechSynthesisVoice[]
voice: SpeechSynthesisVoice | null
rate: number
pitch: number
volume: number
setVoice: (v: SpeechSynthesisVoice | null) => void
setRate: (r: number) => void
setPitch: (p: number) => void
setVolume: (v: number) => void
speak: (text: string, langOverride?: string) => void
pause: () => void
resume: () => void
stop: () => void
}
export function useTextToSpeech(options: UseTTSOptions = {}): UseTTS {
const synth = typeof window !== 'undefined' ? window.speechSynthesis : undefined
const supported = !!synth
const [voices, setVoices] = useState<SpeechSynthesisVoice[]>([])
const [voice, setVoice] = useState<SpeechSynthesisVoice | null>(null)
const [speaking, setSpeaking] = useState(false)
const [paused, setPaused] = useState(false)
const [rate, setRate] = useState(options.defaultRate ?? 2.1)
const [pitch, setPitch] = useState(options.defaultPitch ?? 1)
const [volume, setVolume] = useState(options.defaultVolume ?? 1)
const defaultLang = options.defaultLang || (typeof navigator !== 'undefined' ? navigator.language : 'en')
const utteranceRef = useRef<SpeechSynthesisUtterance | null>(null)
const spokenTextRef = useRef<string>('')
const charIndexRef = useRef<number>(0)
// Update rate when defaultRate option changes
useEffect(() => {
if (options.defaultRate !== undefined) {
console.debug('[tts] defaultRate changed ->', options.defaultRate)
setRate(options.defaultRate)
}
}, [options.defaultRate])
// Load voices (async in many browsers)
useEffect(() => {
if (!supported) return
const load = () => {
const v = synth!.getVoices()
setVoices(v)
if (!voice && v.length) {
const byLang = v.find(x => x.lang?.toLowerCase().startsWith(defaultLang.toLowerCase()))
setVoice(byLang || v[0] || null)
console.debug('[tts] voices loaded', { total: v.length, picked: (byLang || v[0] || null)?.lang })
}
}
load()
const handleVoicesChanged = () => load()
synth!.addEventListener('voiceschanged', handleVoicesChanged)
return () => {
synth!.removeEventListener('voiceschanged', handleVoicesChanged)
}
}, [supported, defaultLang, voice, synth])
const createUtterance = useCallback((text: string): SpeechSynthesisUtterance => {
const SpeechSynthesisUtteranceConstructor = (window as Window & typeof globalThis).SpeechSynthesisUtterance
const u = new SpeechSynthesisUtteranceConstructor(text) as SpeechSynthesisUtterance
u.lang = voice?.lang || defaultLang
if (voice) u.voice = voice
u.rate = rate
u.pitch = pitch
u.volume = volume
const self = u
u.onstart = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onstart')
setSpeaking(true)
setPaused(false)
}
u.onpause = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onpause')
setPaused(true)
}
u.onresume = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onresume')
setPaused(false)
}
u.onend = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onend')
setSpeaking(false)
setPaused(false)
utteranceRef.current = null
}
u.onerror = () => {
if (utteranceRef.current !== self) return
console.debug('[tts] onerror')
setSpeaking(false)
setPaused(false)
utteranceRef.current = null
}
u.onboundary = (ev: SpeechSynthesisEvent) => {
if (utteranceRef.current !== self) return
if (typeof ev.charIndex === 'number') {
const newIndex = ev.charIndex
if (newIndex > charIndexRef.current) {
charIndexRef.current = newIndex
}
}
}
return u
}, [voice, defaultLang, rate, pitch, volume])
const stop = useCallback(() => {
if (!supported) return
console.debug('[tts] stop')
synth!.cancel()
setSpeaking(false)
setPaused(false)
utteranceRef.current = null
charIndexRef.current = 0
spokenTextRef.current = ''
}, [supported, synth])
const speak = useCallback((text: string, langOverride?: string) => {
if (!supported || !text?.trim()) return
console.debug('[tts] speak', { len: text.length, rate })
synth!.cancel()
spokenTextRef.current = text
charIndexRef.current = 0
const u = createUtterance(text)
if (langOverride) {
u.lang = langOverride
// try to pick a voice that matches the override
const available = voices
const match = available.find(v => v.lang?.toLowerCase().startsWith(langOverride.toLowerCase()))
if (match) u.voice = match
}
utteranceRef.current = u
synth!.speak(u)
}, [supported, synth, createUtterance, rate, voices])
const pause = useCallback(() => {
if (!supported) return
if (synth!.speaking && !synth!.paused) {
console.debug('[tts] pause')
synth!.pause()
setPaused(true)
}
}, [supported, synth])
const resume = useCallback(() => {
if (!supported) return
if (synth!.speaking && synth!.paused) {
console.debug('[tts] resume')
synth!.resume()
setPaused(false)
}
}, [supported, synth])
// Update rate in real-time: while speaking, restart from last boundary with new rate.
useEffect(() => {
if (!supported) return
if (!utteranceRef.current) return
console.debug('[tts] rate change', { rate, speaking: synth!.speaking, paused: synth!.paused, charIndex: charIndexRef.current })
if (synth!.speaking && !synth!.paused) {
const fullText = spokenTextRef.current
const startIndex = Math.max(0, Math.min(charIndexRef.current, fullText.length - 1))
const remainingText = fullText.slice(startIndex)
console.debug('[tts] restart at new rate', { startIndex, remainingLen: remainingText.length })
synth!.cancel()
const u = createUtterance(remainingText)
utteranceRef.current = u
synth!.speak(u)
return
}
if (utteranceRef.current) {
utteranceRef.current.rate = rate
}
}, [rate, supported, synth, createUtterance])
const updateRate = useCallback((newRate: number) => {
setRate(newRate)
if (!supported) return
if (!utteranceRef.current) return
if (synth!.speaking && !synth!.paused) {
const fullText = spokenTextRef.current
const startIndex = Math.max(0, Math.min(charIndexRef.current, fullText.length - 1))
const remainingText = fullText.slice(startIndex)
console.debug('[tts] updateRate -> restart', { newRate, startIndex, remainingLen: remainingText.length })
synth!.cancel()
const u = createUtterance(remainingText)
// ensure the new rate is applied immediately on the new utterance
u.rate = newRate
utteranceRef.current = u
synth!.speak(u)
} else if (utteranceRef.current) {
console.debug('[tts] updateRate -> set on utterance', { newRate })
utteranceRef.current.rate = newRate
}
}, [supported, synth, createUtterance])
// stop TTS when unmounting
useEffect(() => stop, [stop])
return useMemo(() => ({
supported,
speaking,
paused,
voices,
voice,
rate,
setRate: updateRate,
pitch, setPitch,
volume, setVolume,
setVoice,
speak, pause, resume, stop
}), [supported, speaking, paused, voices, voice, rate, updateRate, pitch, volume, setVoice, speak, pause, resume, stop])
}

View File

@@ -65,6 +65,12 @@ export interface UserSettings {
autoMarkAsReadOnCompletion?: boolean // default: false (opt-in)
// Bookmark filtering
hideBookmarksWithoutCreationDate?: boolean // default: false
// TTS language selection
ttsUseSystemLanguage?: boolean // default: false
ttsDetectContentLanguage?: boolean // default: true
ttsLanguageMode?: 'system' | 'content' // default: 'content'
// Text-to-Speech settings
ttsDefaultSpeed?: number // default: 2.1
}
export async function loadSettings(