Merge pull request #22 from dergigi/tts

feat: Add comprehensive Text-to-Speech (TTS) functionality
2026-02-06 15:44:20 +01:00 · 2025-10-20 23:03:22 +02:00
parent 95f6949ab7 a551234a29
commit 1ac7fb26b2
9 changed files with 466 additions and 2 deletions
--- a/FEATURES.md
+++ b/FEATURES.md
@@ -11,6 +11,7 @@
 - **Distraction‑free view**: Clean typography, optional hero image, summary, and published date.
 - **Reading time**: Displays estimated reading time for text or duration for supported videos.
 - **Progress**: Reading progress indicator with completion state.
+- **Text‑to‑Speech**: Listen to articles with browser‑native TTS; play/pause/stop controls with adjustable speed (0.8–1.6x).
 - **Menus**: Quick actions to open, share, or copy links (for both Nostr and web content).
 - **Performance**: Lightweight fetching and caching for speed; skeleton loaders to avoid empty flashes.

--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "boris",
-  "version": "0.9.0",
+  "version": "0.9.1",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "boris",
-      "version": "0.9.0",
+      "version": "0.9.1",
      "dependencies": {
        "@fortawesome/fontawesome-svg-core": "^7.1.0",
        "@fortawesome/free-regular-svg-icons": "^7.1.0",
@@ -35,6 +35,7 @@
        "rehype-prism-plus": "^2.0.1",
        "rehype-raw": "^7.0.0",
        "remark-gfm": "^4.0.1",
+        "tinyld": "^1.3.4",
        "use-pull-to-refresh": "^2.4.1"
      },
      "devDependencies": {
@@ -11215,6 +11216,22 @@
        "url": "https://github.com/sponsors/jonschlinkert"
      }
    },
+    "node_modules/tinyld": {
+      "version": "1.3.4",
+      "resolved": "https://registry.npmjs.org/tinyld/-/tinyld-1.3.4.tgz",
+      "integrity": "sha512-u26CNoaInA4XpDU+8s/6Cq8xHc2T5M4fXB3ICfXPokUQoLzmPgSZU02TAkFwFMJCWTjk53gtkS8pETTreZwCqw==",
+      "license": "MIT",
+      "bin": {
+        "tinyld": "bin/tinyld.js",
+        "tinyld-heavy": "bin/tinyld-heavy.js",
+        "tinyld-light": "bin/tinyld-light.js"
+      },
+      "engines": {
+        "node": ">= 12.10.0",
+        "npm": ">= 6.12.0",
+        "yarn": ">= 1.20.0"
+      }
+    },
    "node_modules/to-regex-range": {
      "version": "5.0.1",
      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
--- a/package.json
+++ b/package.json
@@ -38,6 +38,7 @@
    "rehype-prism-plus": "^2.0.1",
    "rehype-raw": "^7.0.0",
    "remark-gfm": "^4.0.1",
+    "tinyld": "^1.3.4",
    "use-pull-to-refresh": "^2.4.1"
  },
  "devDependencies": {
--- a/src/components/ContentPanel.tsx
+++ b/src/components/ContentPanel.tsx
@@ -46,6 +46,7 @@ import {
  loadReadingPosition, 
  saveReadingPosition 
 } from '../services/readingPositionService'
+import TTSControls from './TTSControls'

 interface ContentPanelProps {
  loading: boolean
@@ -321,6 +322,25 @@ const ContentPanel: React.FC<ContentPanelProps> = ({

  const hasHighlights = relevantHighlights.length > 0

+  // Extract plain text for TTS
+  const baseHtml = useMemo(() => {
+    if (markdown) return renderedMarkdownHtml && finalHtml ? finalHtml : ''
+    return finalHtml || html || ''
+  }, [markdown, renderedMarkdownHtml, finalHtml, html])
+
+  const articleText = useMemo(() => {
+    const parts: string[] = []
+    if (title) parts.push(title)
+    if (summary) parts.push(summary)
+    if (baseHtml) {
+      const div = document.createElement('div')
+      div.innerHTML = baseHtml
+      const txt = (div.textContent || '').replace(/\s+/g, ' ').trim()
+      if (txt) parts.push(txt)
+    }
+    return parts.join('. ')
+  }, [title, summary, baseHtml])
+
  // Determine if we're on a nostr-native article (/a/) or external URL (/r/)
  const isNostrArticle = selectedUrl && selectedUrl.startsWith('nostr:')
  const isExternalVideo = !isNostrArticle && !!selectedUrl && ['youtube', 'video'].includes(classifyUrl(selectedUrl).type)
@@ -759,6 +779,11 @@ const ContentPanel: React.FC<ContentPanelProps> = ({
        highlights={relevantHighlights}
        highlightVisibility={highlightVisibility}
      />
+      {isTextContent && articleText && (
+        <div style={{ padding: '0 0.75rem 0.5rem 0.75rem' }}>
+          <TTSControls text={articleText} defaultLang={navigator?.language} settings={settings} />
+        </div>
+      )}
      {isExternalVideo ? (
        <>
          <div className="reader-video">
--- a/src/components/Settings.tsx
+++ b/src/components/Settings.tsx
@@ -12,6 +12,7 @@ import LayoutBehaviorSettings from './Settings/LayoutBehaviorSettings'
 import ZapSettings from './Settings/ZapSettings'
 import RelaySettings from './Settings/RelaySettings'
 import PWASettings from './Settings/PWASettings'
+import TTSSettings from './Settings/TTSSettings'
 import { useRelayStatus } from '../hooks/useRelayStatus'
 import VersionFooter from './VersionFooter'

@@ -45,6 +46,10 @@ const DEFAULT_SETTINGS: UserSettings = {
  syncReadingPosition: true,
  autoMarkAsReadOnCompletion: false,
  hideBookmarksWithoutCreationDate: true,
+  ttsUseSystemLanguage: false,
+  ttsDetectContentLanguage: true,
+  ttsLanguageMode: 'content',
+  ttsDefaultSpeed: 2.1,
 }

 interface SettingsProps {
@@ -175,6 +180,7 @@ const Settings: React.FC<SettingsProps> = ({ settings, onSave, onClose, relayPoo
        <MediaDisplaySettings settings={localSettings} onUpdate={handleUpdate} />
        <ExploreSettings settings={localSettings} onUpdate={handleUpdate} />
        <ZapSettings settings={localSettings} onUpdate={handleUpdate} />
+        <TTSSettings settings={localSettings} onUpdate={handleUpdate} />
        <LayoutBehaviorSettings settings={localSettings} onUpdate={handleUpdate} />
        <PWASettings settings={localSettings} onUpdate={handleUpdate} onClose={onClose} />
        <RelaySettings relayStatuses={relayStatuses} onClose={onClose} />
--- a/src/components/Settings/TTSSettings.tsx
+++ b/src/components/Settings/TTSSettings.tsx
@@ -0,0 +1,58 @@
+import React from 'react'
+import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
+import { faGauge } from '@fortawesome/free-solid-svg-icons'
+import { UserSettings } from '../../services/settingsService'
+
+interface TTSSettingsProps {
+  settings: UserSettings
+  onUpdate: (updates: Partial<UserSettings>) => void
+}
+
+const SPEED_OPTIONS = [0.8, 1, 1.2, 1.4, 1.6, 1.8, 2, 2.1, 2.4, 2.8, 3]
+
+const TTSSettings: React.FC<TTSSettingsProps> = ({ settings, onUpdate }) => {
+  const currentSpeed = settings.ttsDefaultSpeed || 2.1
+
+  const handleCycleSpeed = () => {
+    const currentIndex = SPEED_OPTIONS.indexOf(currentSpeed)
+    const nextIndex = (currentIndex + 1) % SPEED_OPTIONS.length
+    onUpdate({ ttsDefaultSpeed: SPEED_OPTIONS[nextIndex] })
+  }
+
+  return (
+    <div className="settings-section">
+      <h3 className="section-title">Text-to-Speech</h3>
+      
+      <div className="setting-group setting-inline">
+        <label>Default Playback Speed</label>
+        <div className="setting-buttons">
+          <button
+            type="button"
+            className="article-menu-btn"
+            onClick={handleCycleSpeed}
+            title="Cycle speed"
+          >
+            <FontAwesomeIcon icon={faGauge} />
+            <span>{currentSpeed}x</span>
+          </button>
+        </div>
+      </div>
+
+      <div className="setting-group setting-inline">
+        <label>Speaker language</label>
+        <div className="setting-control">
+          <select
+            value={settings.ttsLanguageMode || 'content'}
+            onChange={e => onUpdate({ ttsLanguageMode: (e.target.value as 'system' | 'content'), ttsUseSystemLanguage: e.target.value === 'system', ttsDetectContentLanguage: e.target.value !== 'system' })}
+            style={{ background: 'var(--color-bg-elevated)', color: 'var(--color-text)', border: '1px solid var(--color-border)', borderRadius: 6, padding: '0.25rem 0.5rem' }}
+          >
+            <option value="system">System Language</option>
+            <option value="content">Content (auto-detect)</option>
+          </select>
+        </div>
+      </div>
+    </div>
+  )
+}
+
+export default TTSSettings
--- a/src/components/TTSControls.tsx
+++ b/src/components/TTSControls.tsx
@@ -0,0 +1,101 @@
+import React, { useMemo } from 'react'
+import { useTextToSpeech } from '../hooks/useTextToSpeech'
+import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
+import { faPlay, faPause, faGauge } from '@fortawesome/free-solid-svg-icons'
+import { UserSettings } from '../services/settingsService'
+import { detect } from 'tinyld'
+
+interface Props {
+  text: string
+  defaultLang?: string
+  className?: string
+  settings?: UserSettings
+}
+
+const SPEED_OPTIONS = [0.8, 1, 1.2, 1.4, 1.6, 1.8, 2, 2.1, 2.4, 2.8, 3]
+
+const TTSControls: React.FC<Props> = ({ text, defaultLang, className, settings }) => {
+  const {
+    supported, speaking, paused,
+    speak, pause, resume,
+    rate, setRate
+  } = useTextToSpeech({ defaultLang, defaultRate: settings?.ttsDefaultSpeed })
+
+  const canPlay = supported && text?.trim().length > 0
+
+  const resolvedSystemLang = useMemo(() => {
+    const mode = settings?.ttsLanguageMode
+    if ((mode ? mode === 'system' : settings?.ttsUseSystemLanguage) === true) {
+      return navigator?.language?.split('-')[0]
+    }
+    return undefined
+  }, [settings?.ttsLanguageMode, settings?.ttsUseSystemLanguage])
+
+  const detectContentLang = useMemo(() => {
+    const mode = settings?.ttsLanguageMode
+    if (mode) return mode === 'content'
+    return settings?.ttsDetectContentLanguage !== false
+  }, [settings?.ttsLanguageMode, settings?.ttsDetectContentLanguage])
+
+  const handlePlayPause = () => {
+    if (!canPlay) return
+
+    if (!speaking) {
+      let langOverride: string | undefined
+      if (detectContentLang && text) {
+        try {
+          const lang = detect(text)
+          if (typeof lang === 'string' && lang.length >= 2) langOverride = lang.slice(0, 2)
+        } catch (err) {
+          console.debug('[tts][detect] failed', err)
+        }
+      }
+      if (!langOverride && resolvedSystemLang) {
+        langOverride = resolvedSystemLang
+      }
+      speak(text, langOverride)
+    } else if (paused) {
+      resume()
+    } else {
+      pause()
+    }
+  }
+
+  const handleCycleSpeed = () => {
+    const currentIndex = SPEED_OPTIONS.indexOf(rate)
+    const nextIndex = (currentIndex + 1) % SPEED_OPTIONS.length
+    const next = SPEED_OPTIONS[nextIndex]
+    console.debug('[tts][ui] cycle speed', { from: rate, to: next, speaking, paused })
+    setRate(next)
+  }
+
+  const playLabel = !speaking ? 'Listen' : (paused ? 'Resume' : 'Pause')
+
+  if (!supported) return null
+
+  return (
+    <div className={className || 'tts-controls'} style={{ display: 'flex', gap: '0.5rem', alignItems: 'center', flexWrap: 'wrap', justifyContent: 'flex-end' }}>
+      <button
+        type="button"
+        className="article-menu-btn"
+        onClick={handlePlayPause}
+        title={playLabel}
+        disabled={!canPlay}
+      >
+        <FontAwesomeIcon icon={!speaking ? faPlay : (paused ? faPlay : faPause)} />
+      </button>
+      <button
+        type="button"
+        className="article-menu-btn"
+        onClick={handleCycleSpeed}
+        title="Cycle speed"
+      >
+        <FontAwesomeIcon icon={faGauge} />
+        <span>{rate}x</span>
+      </button>
+    </div>
+  )
+}
+
+export default TTSControls
+
--- a/src/hooks/useTextToSpeech.ts
+++ b/src/hooks/useTextToSpeech.ts
@@ -0,0 +1,249 @@
+import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
+
+// Web Speech API types
+type SpeechSynthesisVoice = {
+  name: string
+  voiceURI: string
+  lang: string
+  localService: boolean
+  default: boolean
+}
+
+export interface UseTTSOptions {
+  defaultLang?: string
+  defaultRate?: number
+  defaultPitch?: number
+  defaultVolume?: number
+}
+
+export interface UseTTS {
+  supported: boolean
+  speaking: boolean
+  paused: boolean
+  voices: SpeechSynthesisVoice[]
+  voice: SpeechSynthesisVoice | null
+  rate: number
+  pitch: number
+  volume: number
+  setVoice: (v: SpeechSynthesisVoice | null) => void
+  setRate: (r: number) => void
+  setPitch: (p: number) => void
+  setVolume: (v: number) => void
+  speak: (text: string, langOverride?: string) => void
+  pause: () => void
+  resume: () => void
+  stop: () => void
+}
+
+export function useTextToSpeech(options: UseTTSOptions = {}): UseTTS {
+  const synth = typeof window !== 'undefined' ? window.speechSynthesis : undefined
+  const supported = !!synth
+  const [voices, setVoices] = useState<SpeechSynthesisVoice[]>([])
+  const [voice, setVoice] = useState<SpeechSynthesisVoice | null>(null)
+  const [speaking, setSpeaking] = useState(false)
+  const [paused, setPaused] = useState(false)
+  const [rate, setRate] = useState(options.defaultRate ?? 2.1)
+  const [pitch, setPitch] = useState(options.defaultPitch ?? 1)
+  const [volume, setVolume] = useState(options.defaultVolume ?? 1)
+  const defaultLang = options.defaultLang || (typeof navigator !== 'undefined' ? navigator.language : 'en')
+
+  const utteranceRef = useRef<SpeechSynthesisUtterance | null>(null)
+  const spokenTextRef = useRef<string>('')
+  const charIndexRef = useRef<number>(0)
+
+  // Update rate when defaultRate option changes
+  useEffect(() => {
+    if (options.defaultRate !== undefined) {
+      console.debug('[tts] defaultRate changed ->', options.defaultRate)
+      setRate(options.defaultRate)
+    }
+  }, [options.defaultRate])
+
+  // Load voices (async in many browsers)
+  useEffect(() => {
+    if (!supported) return
+    const load = () => {
+      const v = synth!.getVoices()
+      setVoices(v)
+      if (!voice && v.length) {
+        const byLang = v.find(x => x.lang?.toLowerCase().startsWith(defaultLang.toLowerCase()))
+        setVoice(byLang || v[0] || null)
+        console.debug('[tts] voices loaded', { total: v.length, picked: (byLang || v[0] || null)?.lang })
+      }
+    }
+    load()
+    const handleVoicesChanged = () => load()
+    synth!.addEventListener('voiceschanged', handleVoicesChanged)
+    return () => {
+      synth!.removeEventListener('voiceschanged', handleVoicesChanged)
+    }
+  }, [supported, defaultLang, voice, synth])
+
+  const createUtterance = useCallback((text: string): SpeechSynthesisUtterance => {
+    const SpeechSynthesisUtteranceConstructor = (window as Window & typeof globalThis).SpeechSynthesisUtterance
+    const u = new SpeechSynthesisUtteranceConstructor(text) as SpeechSynthesisUtterance
+    u.lang = voice?.lang || defaultLang
+    if (voice) u.voice = voice
+    u.rate = rate
+    u.pitch = pitch
+    u.volume = volume
+
+    const self = u
+
+    u.onstart = () => {
+      if (utteranceRef.current !== self) return
+      console.debug('[tts] onstart')
+      setSpeaking(true)
+      setPaused(false)
+    }
+    u.onpause = () => {
+      if (utteranceRef.current !== self) return
+      console.debug('[tts] onpause')
+      setPaused(true)
+    }
+    u.onresume = () => {
+      if (utteranceRef.current !== self) return
+      console.debug('[tts] onresume')
+      setPaused(false)
+    }
+    u.onend = () => {
+      if (utteranceRef.current !== self) return
+      console.debug('[tts] onend')
+      setSpeaking(false)
+      setPaused(false)
+      utteranceRef.current = null
+    }
+    u.onerror = () => {
+      if (utteranceRef.current !== self) return
+      console.debug('[tts] onerror')
+      setSpeaking(false)
+      setPaused(false)
+      utteranceRef.current = null
+    }
+    u.onboundary = (ev: SpeechSynthesisEvent) => {
+      if (utteranceRef.current !== self) return
+      if (typeof ev.charIndex === 'number') {
+        const newIndex = ev.charIndex
+        if (newIndex > charIndexRef.current) {
+          charIndexRef.current = newIndex
+        }
+      }
+    }
+
+    return u
+  }, [voice, defaultLang, rate, pitch, volume])
+
+  const stop = useCallback(() => {
+    if (!supported) return
+    console.debug('[tts] stop')
+    synth!.cancel()
+    setSpeaking(false)
+    setPaused(false)
+    utteranceRef.current = null
+    charIndexRef.current = 0
+    spokenTextRef.current = ''
+  }, [supported, synth])
+
+  const speak = useCallback((text: string, langOverride?: string) => {
+    if (!supported || !text?.trim()) return
+    console.debug('[tts] speak', { len: text.length, rate })
+    synth!.cancel()
+    spokenTextRef.current = text
+    charIndexRef.current = 0
+
+    const u = createUtterance(text)
+    if (langOverride) {
+      u.lang = langOverride
+      // try to pick a voice that matches the override
+      const available = voices
+      const match = available.find(v => v.lang?.toLowerCase().startsWith(langOverride.toLowerCase()))
+      if (match) u.voice = match
+    }
+
+    utteranceRef.current = u
+    synth!.speak(u)
+  }, [supported, synth, createUtterance, rate, voices])
+
+  const pause = useCallback(() => {
+    if (!supported) return
+    if (synth!.speaking && !synth!.paused) {
+      console.debug('[tts] pause')
+      synth!.pause()
+      setPaused(true)
+    }
+  }, [supported, synth])
+
+  const resume = useCallback(() => {
+    if (!supported) return
+    if (synth!.speaking && synth!.paused) {
+      console.debug('[tts] resume')
+      synth!.resume()
+      setPaused(false)
+    }
+  }, [supported, synth])
+
+  // Update rate in real-time: while speaking, restart from last boundary with new rate.
+  useEffect(() => {
+    if (!supported) return
+    if (!utteranceRef.current) return
+
+    console.debug('[tts] rate change', { rate, speaking: synth!.speaking, paused: synth!.paused, charIndex: charIndexRef.current })
+
+    if (synth!.speaking && !synth!.paused) {
+      const fullText = spokenTextRef.current
+      const startIndex = Math.max(0, Math.min(charIndexRef.current, fullText.length - 1))
+      const remainingText = fullText.slice(startIndex)
+
+      console.debug('[tts] restart at new rate', { startIndex, remainingLen: remainingText.length })
+      synth!.cancel()
+      const u = createUtterance(remainingText)
+      utteranceRef.current = u
+      synth!.speak(u)
+      return
+    }
+
+    if (utteranceRef.current) {
+      utteranceRef.current.rate = rate
+    }
+  }, [rate, supported, synth, createUtterance])
+
+  const updateRate = useCallback((newRate: number) => {
+    setRate(newRate)
+    if (!supported) return
+    if (!utteranceRef.current) return
+
+    if (synth!.speaking && !synth!.paused) {
+      const fullText = spokenTextRef.current
+      const startIndex = Math.max(0, Math.min(charIndexRef.current, fullText.length - 1))
+      const remainingText = fullText.slice(startIndex)
+      console.debug('[tts] updateRate -> restart', { newRate, startIndex, remainingLen: remainingText.length })
+      synth!.cancel()
+      const u = createUtterance(remainingText)
+      // ensure the new rate is applied immediately on the new utterance
+      u.rate = newRate
+      utteranceRef.current = u
+      synth!.speak(u)
+    } else if (utteranceRef.current) {
+      console.debug('[tts] updateRate -> set on utterance', { newRate })
+      utteranceRef.current.rate = newRate
+    }
+  }, [supported, synth, createUtterance])
+
+  // stop TTS when unmounting
+  useEffect(() => stop, [stop])
+
+  return useMemo(() => ({
+    supported,
+    speaking,
+    paused,
+    voices,
+    voice,
+    rate,
+    setRate: updateRate,
+    pitch, setPitch,
+    volume, setVolume,
+    setVoice,
+    speak, pause, resume, stop
+  }), [supported, speaking, paused, voices, voice, rate, updateRate, pitch, volume, setVoice, speak, pause, resume, stop])
+}
+
--- a/src/services/settingsService.ts
+++ b/src/services/settingsService.ts
@@ -65,6 +65,12 @@ export interface UserSettings {
  autoMarkAsReadOnCompletion?: boolean // default: false (opt-in)
  // Bookmark filtering
  hideBookmarksWithoutCreationDate?: boolean // default: false
+  // TTS language selection
+  ttsUseSystemLanguage?: boolean // default: false
+  ttsDetectContentLanguage?: boolean // default: true
+  ttsLanguageMode?: 'system' | 'content' // default: 'content'
+  // Text-to-Speech settings
+  ttsDefaultSpeed?: number // default: 2.1
 }

 export async function loadSettings(