mirror of
https://github.com/aljazceru/transcription-api.git
synced 2025-12-17 07:14:24 +01:00
transcription api
This commit is contained in:
@@ -40,3 +40,11 @@ path = "src/live_transcribe.rs"
|
||||
[[bin]]
|
||||
name = "realtime-playback"
|
||||
path = "src/realtime_playback.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "system-audio"
|
||||
path = "src/system_audio_transcribe.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "stdin-transcribe"
|
||||
path = "src/stdin_transcribe.rs"
|
||||
@@ -76,7 +76,7 @@ async fn main() -> Result<()> {
|
||||
// Also open the file for streaming to transcription service
|
||||
// We need to read the raw audio data for transcription
|
||||
let mut wav_reader = WavReader::open(&file_path)?;
|
||||
let wav_spec = wav_reader.spec();
|
||||
let _wav_spec = wav_reader.spec();
|
||||
|
||||
// Collect all samples for streaming
|
||||
let samples: Vec<i16> = wav_reader.samples::<i16>()
|
||||
|
||||
195
examples/rust-client/src/stdin_transcribe.rs
Normal file
195
examples/rust-client/src/stdin_transcribe.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use futures_util::StreamExt;
|
||||
use std::io::{self, Read};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tracing::{error, info};
|
||||
|
||||
// Import generated protobuf types
|
||||
pub mod transcription {
|
||||
tonic::include_proto!("transcription");
|
||||
}
|
||||
|
||||
use transcription::{
|
||||
transcription_service_client::TranscriptionServiceClient, AudioChunk, AudioConfig,
|
||||
};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Transcribe audio from stdin (for piping from parec)", long_about = None)]
|
||||
struct Args {
|
||||
/// gRPC server address
|
||||
#[arg(short, long, default_value = "http://localhost:50051")]
|
||||
server: String,
|
||||
|
||||
/// Language code (e.g., "en", "es", "auto")
|
||||
#[arg(short, long, default_value = "en")]
|
||||
language: String,
|
||||
|
||||
/// Task: transcribe or translate
|
||||
#[arg(short, long, default_value = "transcribe")]
|
||||
task: String,
|
||||
|
||||
/// Model to use
|
||||
#[arg(short, long, default_value = "base")]
|
||||
model: String,
|
||||
|
||||
/// Show timestamps
|
||||
#[arg(short = 'T', long)]
|
||||
timestamps: bool,
|
||||
|
||||
/// Chunk size in seconds (for buffering)
|
||||
#[arg(short, long, default_value = "3.0")]
|
||||
chunk_seconds: f32,
|
||||
|
||||
/// Disable VAD (Voice Activity Detection) - useful for music/system audio
|
||||
#[arg(long)]
|
||||
no_vad: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt::init();
|
||||
let args = Args::parse();
|
||||
|
||||
info!("Connecting to transcription service at {}", args.server);
|
||||
let mut client = TranscriptionServiceClient::connect(args.server.clone()).await?;
|
||||
|
||||
// Create channel for audio chunks
|
||||
let (tx, rx) = mpsc::channel::<AudioChunk>(100);
|
||||
|
||||
// Spawn task to read from stdin and send chunks
|
||||
let tx_clone = tx.clone();
|
||||
let chunk_seconds = args.chunk_seconds;
|
||||
std::thread::spawn(move || {
|
||||
if let Err(e) = read_stdin_and_send(tx_clone, chunk_seconds) {
|
||||
error!("Error reading stdin: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Create the first chunk with configuration
|
||||
let config = AudioConfig {
|
||||
language: args.language.clone(),
|
||||
task: args.task.clone(),
|
||||
model: args.model.clone(),
|
||||
sample_rate: 16000,
|
||||
vad_enabled: !args.no_vad, // Disable VAD if --no-vad flag is used
|
||||
};
|
||||
|
||||
// Send a configuration chunk first
|
||||
let config_chunk = AudioChunk {
|
||||
audio_data: vec![],
|
||||
session_id: "stdin-transcribe".to_string(),
|
||||
config: Some(config),
|
||||
};
|
||||
|
||||
// Create stream from receiver
|
||||
let stream = ReceiverStream::new(rx);
|
||||
let stream = futures_util::stream::iter(vec![config_chunk]).chain(stream);
|
||||
|
||||
// Start streaming transcription
|
||||
let request = tonic::Request::new(stream);
|
||||
let mut response = client.stream_transcribe(request).await?.into_inner();
|
||||
|
||||
println!("\n🎧 Transcribing audio from stdin...");
|
||||
println!("Press Ctrl+C to stop\n");
|
||||
println!("{}", "─".repeat(80));
|
||||
|
||||
let mut current_line = String::new();
|
||||
|
||||
// Process transcription responses
|
||||
while let Some(result) = response.message().await? {
|
||||
if !result.text.is_empty() {
|
||||
if args.timestamps {
|
||||
if result.is_final {
|
||||
println!("[{:.1}s] {}", result.start_time, result.text);
|
||||
current_line.clear();
|
||||
} else {
|
||||
print!("\r[{:.1}s] {:<80}", result.start_time, result.text);
|
||||
use std::io::{self as stdio, Write};
|
||||
stdio::stdout().flush()?;
|
||||
current_line = result.text.clone();
|
||||
}
|
||||
} else {
|
||||
if result.is_final {
|
||||
println!("{}", result.text);
|
||||
current_line.clear();
|
||||
} else {
|
||||
print!("\r{:<80}", result.text);
|
||||
use std::io::{self as stdio, Write};
|
||||
stdio::stdout().flush()?;
|
||||
current_line = result.text.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clear any remaining interim text
|
||||
if !current_line.is_empty() {
|
||||
println!();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_stdin_and_send(tx: mpsc::Sender<AudioChunk>, chunk_seconds: f32) -> Result<()> {
|
||||
let stdin = io::stdin();
|
||||
let mut handle = stdin.lock();
|
||||
|
||||
// Calculate chunk size in bytes (16kHz, 16-bit mono)
|
||||
let samples_per_chunk = (16000.0 * chunk_seconds) as usize;
|
||||
let bytes_per_chunk = samples_per_chunk * 2; // 16-bit = 2 bytes
|
||||
|
||||
let mut buffer = vec![0u8; bytes_per_chunk];
|
||||
|
||||
info!("Reading audio from stdin (chunk size: {} bytes, {} seconds)",
|
||||
bytes_per_chunk, chunk_seconds);
|
||||
|
||||
loop {
|
||||
// Read a chunk from stdin
|
||||
let mut total_read = 0;
|
||||
while total_read < bytes_per_chunk {
|
||||
match handle.read(&mut buffer[total_read..]) {
|
||||
Ok(0) => {
|
||||
// EOF reached
|
||||
if total_read > 0 {
|
||||
// Send remaining data
|
||||
let audio_chunk = AudioChunk {
|
||||
audio_data: buffer[..total_read].to_vec(),
|
||||
session_id: String::new(),
|
||||
config: None,
|
||||
};
|
||||
let _ = tx.blocking_send(audio_chunk);
|
||||
}
|
||||
info!("End of stdin reached");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(n) => {
|
||||
total_read += n;
|
||||
}
|
||||
Err(e) if e.kind() == io::ErrorKind::Interrupted => {
|
||||
// Retry on interrupt
|
||||
continue;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Error reading stdin: {}", e);
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Send the chunk
|
||||
let audio_chunk = AudioChunk {
|
||||
audio_data: buffer.clone(),
|
||||
session_id: String::new(),
|
||||
config: None,
|
||||
};
|
||||
|
||||
if tx.blocking_send(audio_chunk).is_err() {
|
||||
// Receiver dropped, exit
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -7,12 +7,10 @@ use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use futures_util::StreamExt;
|
||||
use hound::WavReader;
|
||||
use std::fs::File;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tonic::transport::Channel;
|
||||
use tracing::info;
|
||||
|
||||
// Import generated protobuf types
|
||||
|
||||
250
examples/rust-client/src/system_audio_transcribe.rs
Normal file
250
examples/rust-client/src/system_audio_transcribe.rs
Normal file
@@ -0,0 +1,250 @@
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
|
||||
use futures_util::StreamExt;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::sync::mpsc as tokio_mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
// Import generated protobuf types
|
||||
pub mod transcription {
|
||||
tonic::include_proto!("transcription");
|
||||
}
|
||||
|
||||
use transcription::{
|
||||
transcription_service_client::TranscriptionServiceClient, AudioChunk, AudioConfig,
|
||||
};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about = "Capture and transcribe system audio", long_about = None)]
|
||||
struct Args {
|
||||
/// gRPC server address
|
||||
#[arg(short, long, default_value = "http://localhost:50051")]
|
||||
server: String,
|
||||
|
||||
/// Language code (e.g., "en", "es", "auto")
|
||||
#[arg(short, long, default_value = "en")]
|
||||
language: String,
|
||||
|
||||
/// Task: transcribe or translate
|
||||
#[arg(short, long, default_value = "transcribe")]
|
||||
task: String,
|
||||
|
||||
/// Model to use
|
||||
#[arg(short, long, default_value = "base")]
|
||||
model: String,
|
||||
|
||||
/// List available audio devices
|
||||
#[arg(long)]
|
||||
list_devices: bool,
|
||||
|
||||
/// Audio device name or index to use (e.g., "pulse.monitor" for PulseAudio monitor)
|
||||
#[arg(short, long)]
|
||||
device: Option<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
tracing_subscriber::fmt::init();
|
||||
let args = Args::parse();
|
||||
|
||||
// List devices if requested
|
||||
if args.list_devices {
|
||||
list_audio_devices()?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!("Connecting to transcription service at {}", args.server);
|
||||
let mut client = TranscriptionServiceClient::connect(args.server.clone()).await?;
|
||||
|
||||
// Create channel for audio chunks
|
||||
let (tx, rx) = tokio_mpsc::channel::<AudioChunk>(100);
|
||||
|
||||
// Start audio capture in a separate thread
|
||||
let device_name = args.device.clone();
|
||||
std::thread::spawn(move || {
|
||||
if let Err(e) = capture_system_audio(tx, device_name) {
|
||||
error!("Audio capture error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Create the first chunk with configuration
|
||||
let config = AudioConfig {
|
||||
language: args.language.clone(),
|
||||
task: args.task.clone(),
|
||||
model: args.model.clone(),
|
||||
sample_rate: 16000,
|
||||
vad_enabled: true, // Enable VAD to filter silence
|
||||
};
|
||||
|
||||
// Send a configuration chunk first
|
||||
let config_chunk = AudioChunk {
|
||||
audio_data: vec![],
|
||||
session_id: "system-audio".to_string(),
|
||||
config: Some(config),
|
||||
};
|
||||
|
||||
// Create stream from receiver
|
||||
let stream_vec = vec![config_chunk];
|
||||
let stream = ReceiverStream::new(rx);
|
||||
let stream = futures_util::stream::iter(stream_vec).chain(stream);
|
||||
|
||||
// Start streaming transcription
|
||||
let request = tonic::Request::new(stream);
|
||||
let mut response = client.stream_transcribe(request).await?.into_inner();
|
||||
|
||||
println!("\n🎧 Capturing system audio for transcription...");
|
||||
println!("Press Ctrl+C to stop\n");
|
||||
println!("{}", "─".repeat(80));
|
||||
|
||||
// Process transcription responses
|
||||
while let Some(result) = response.message().await? {
|
||||
if !result.text.is_empty() {
|
||||
if result.is_final {
|
||||
println!("[FINAL] {}", result.text);
|
||||
} else {
|
||||
print!("\r[INTERIM] {:<80}", result.text);
|
||||
use std::io::{self, Write};
|
||||
io::stdout().flush()?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// List all available audio devices
|
||||
fn list_audio_devices() -> Result<()> {
|
||||
let host = cpal::default_host();
|
||||
|
||||
println!("\n📊 Available Audio Devices:");
|
||||
println!("{}", "─".repeat(80));
|
||||
|
||||
// List input devices
|
||||
println!("\n🎤 Input Devices:");
|
||||
for (idx, device) in host.input_devices()?.enumerate() {
|
||||
let name = device.name()?;
|
||||
let is_monitor = name.contains("monitor") || name.contains("Monitor") ||
|
||||
name.contains("loopback") || name.contains("Loopback") ||
|
||||
name.contains("stereo mix") || name.contains("Stereo Mix");
|
||||
|
||||
if is_monitor {
|
||||
println!(" [{}] {} 🔊 (System Audio)", idx, name);
|
||||
} else {
|
||||
println!(" [{}] {}", idx, name);
|
||||
}
|
||||
}
|
||||
|
||||
// Show default device
|
||||
if let Some(device) = host.default_input_device() {
|
||||
println!("\n⭐ Default Input: {}", device.name()?);
|
||||
}
|
||||
|
||||
println!("\n💡 Tips for capturing system audio:");
|
||||
println!(" Linux: Look for devices with 'monitor' in the name (PulseAudio/PipeWire)");
|
||||
println!(" Windows: Install VB-Cable or enable 'Stereo Mix' in sound settings");
|
||||
println!(" macOS: Install BlackHole or Loopback for system audio capture");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Capture audio from system (or specified device)
|
||||
fn capture_system_audio(tx: tokio_mpsc::Sender<AudioChunk>, device_name: Option<String>) -> Result<()> {
|
||||
let host = cpal::default_host();
|
||||
|
||||
// Find the appropriate audio device
|
||||
let device = if let Some(name) = device_name {
|
||||
// Try to find device by name
|
||||
let mut found_device = None;
|
||||
for input_device in host.input_devices()? {
|
||||
if input_device.name()?.contains(&name) {
|
||||
found_device = Some(input_device);
|
||||
break;
|
||||
}
|
||||
}
|
||||
found_device.ok_or_else(|| anyhow::anyhow!("Device '{}' not found. Use --list-devices to see available devices.", name))?
|
||||
} else {
|
||||
// Try to find a monitor/loopback device automatically
|
||||
let mut monitor_device = None;
|
||||
for input_device in host.input_devices()? {
|
||||
let name = input_device.name()?;
|
||||
if name.contains("monitor") || name.contains("Monitor") ||
|
||||
name.contains("loopback") || name.contains("Loopback") ||
|
||||
name.contains("stereo mix") || name.contains("Stereo Mix") {
|
||||
info!("Found system audio device: {}", name);
|
||||
monitor_device = Some(input_device);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(device) = monitor_device {
|
||||
device
|
||||
} else {
|
||||
warn!("No system audio device found. Using default input device.");
|
||||
warn!("To capture system audio, you may need to:");
|
||||
warn!(" - Linux: Enable PulseAudio monitor");
|
||||
warn!(" - Windows: Enable Stereo Mix or install VB-Cable");
|
||||
warn!(" - macOS: Install BlackHole or Loopback");
|
||||
host.default_input_device()
|
||||
.ok_or_else(|| anyhow::anyhow!("No input device available"))?
|
||||
}
|
||||
};
|
||||
|
||||
info!("Using audio device: {}", device.name()?);
|
||||
|
||||
// Configure audio capture for 16kHz mono PCM16
|
||||
let config = cpal::StreamConfig {
|
||||
channels: 1,
|
||||
sample_rate: cpal::SampleRate(16000),
|
||||
buffer_size: cpal::BufferSize::Default,
|
||||
};
|
||||
|
||||
// Buffer to accumulate audio samples
|
||||
let buffer = Arc::new(Mutex::new(Vec::new()));
|
||||
let buffer_clone = buffer.clone();
|
||||
|
||||
// Create audio stream
|
||||
let stream = device.build_input_stream(
|
||||
&config,
|
||||
move |data: &[i16], _: &cpal::InputCallbackInfo| {
|
||||
let mut buf = buffer_clone.lock().unwrap();
|
||||
buf.extend_from_slice(data);
|
||||
|
||||
// Send chunks of ~3 seconds (48000 samples at 16kHz) for better accuracy
|
||||
while buf.len() >= 48000 {
|
||||
let chunk: Vec<i16> = buf.drain(..48000).collect();
|
||||
|
||||
// Convert i16 to bytes
|
||||
let bytes: Vec<u8> = chunk.iter()
|
||||
.flat_map(|&sample| sample.to_le_bytes())
|
||||
.collect();
|
||||
|
||||
let audio_chunk = AudioChunk {
|
||||
audio_data: bytes,
|
||||
session_id: String::new(),
|
||||
config: None,
|
||||
};
|
||||
|
||||
// Send chunk (ignore errors if receiver is closed)
|
||||
let tx_clone = tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = tx_clone.send(audio_chunk).await;
|
||||
});
|
||||
}
|
||||
},
|
||||
move |err| {
|
||||
error!("Audio stream error: {}", err);
|
||||
},
|
||||
None
|
||||
)?;
|
||||
|
||||
// Start the stream
|
||||
stream.play()?;
|
||||
info!("Audio capture started");
|
||||
|
||||
// Keep the stream alive
|
||||
loop {
|
||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
139
examples/rust-client/transcribe_video_call.sh
Executable file
139
examples/rust-client/transcribe_video_call.sh
Executable file
@@ -0,0 +1,139 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Enhanced script for transcribing video calls on Ubuntu with PipeWire
|
||||
# Uses parec (PulseAudio compatibility) to capture system audio
|
||||
|
||||
set -e
|
||||
|
||||
echo "🎥 Video Call Transcription Service"
|
||||
echo "===================================="
|
||||
echo ""
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Check dependencies
|
||||
check_dependency() {
|
||||
if ! command -v $1 &> /dev/null; then
|
||||
echo -e "${RED}❌ $1 not found.${NC}"
|
||||
echo "Please install: sudo apt-get install $2"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
echo "Checking dependencies..."
|
||||
check_dependency "parec" "pulseaudio-utils" || exit 1
|
||||
check_dependency "sox" "sox" || echo -e "${YELLOW}⚠️ sox not installed (optional but recommended)${NC}"
|
||||
|
||||
# Function to find the monitor source for system audio
|
||||
find_monitor_source() {
|
||||
# List all sources and find monitors (what you hear)
|
||||
local monitors=$(pactl list sources short 2>/dev/null | grep -i "monitor" | awk '{print $2}')
|
||||
|
||||
if [ -z "$monitors" ]; then
|
||||
# Try pacmd if pactl doesn't work
|
||||
monitors=$(pacmd list-sources 2>/dev/null | grep "name:" | grep "monitor" | sed 's/.*<\(.*\)>.*/\1/')
|
||||
fi
|
||||
|
||||
if [ -z "$monitors" ]; then
|
||||
# Fallback: try to construct monitor name from default sink
|
||||
local default_sink=$(pactl info 2>/dev/null | grep "Default Sink" | cut -d: -f2 | xargs)
|
||||
if [ -n "$default_sink" ]; then
|
||||
monitors="${default_sink}.monitor"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$monitors" | head -1
|
||||
}
|
||||
|
||||
# List available sources
|
||||
if [ "$1" == "--list" ]; then
|
||||
echo -e "${GREEN}📊 Available Audio Sources:${NC}"
|
||||
echo ""
|
||||
pactl list sources short 2>/dev/null || pacmd list-sources 2>/dev/null | grep "name:"
|
||||
echo ""
|
||||
echo -e "${GREEN}💡 Monitor sources (system audio):${NC}"
|
||||
pactl list sources short 2>/dev/null | grep -i "monitor" || echo "No monitor sources found"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Help message
|
||||
if [ "$1" == "--help" ] || [ "$1" == "-h" ]; then
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " --list List all available audio sources"
|
||||
echo " --source SOURCE Use specific audio source"
|
||||
echo " --microphone Capture microphone instead of system audio"
|
||||
echo " --combined Capture both microphone and system audio"
|
||||
echo " --help, -h Show this help message"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 # Auto-detect and transcribe system audio"
|
||||
echo " $0 --microphone # Transcribe from microphone"
|
||||
echo " $0 --combined # Transcribe both mic and system audio"
|
||||
echo ""
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Determine what to capture
|
||||
if [ "$1" == "--microphone" ]; then
|
||||
echo -e "${GREEN}🎤 Using microphone input${NC}"
|
||||
# Run the existing live-transcribe for microphone
|
||||
exec cargo run --bin live-transcribe
|
||||
exit 0
|
||||
elif [ "$1" == "--combined" ]; then
|
||||
echo -e "${YELLOW}🎤+🔊 Combined audio capture not yet implemented${NC}"
|
||||
echo "For now, please run two separate instances:"
|
||||
echo " 1. $0 (for system audio)"
|
||||
echo " 2. $0 --microphone (for mic)"
|
||||
exit 1
|
||||
elif [ "$1" == "--source" ] && [ -n "$2" ]; then
|
||||
SOURCE="$2"
|
||||
echo -e "${GREEN}📡 Using specified source: $SOURCE${NC}"
|
||||
else
|
||||
# Auto-detect monitor source
|
||||
SOURCE=$(find_monitor_source)
|
||||
if [ -z "$SOURCE" ]; then
|
||||
echo -e "${RED}❌ Could not find system audio monitor source${NC}"
|
||||
echo ""
|
||||
echo "This might happen if:"
|
||||
echo " 1. No audio is currently playing"
|
||||
echo " 2. PipeWire/PulseAudio is not running"
|
||||
echo ""
|
||||
echo "Try:"
|
||||
echo " 1. Play some audio (music/video)"
|
||||
echo " 2. Run: $0 --list"
|
||||
echo " 3. Use a specific source: $0 --source <source_name>"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${GREEN}📡 Found system audio source: $SOURCE${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}🎬 Starting video call transcription...${NC}"
|
||||
echo -e "${YELLOW}Press Ctrl+C to stop${NC}"
|
||||
echo ""
|
||||
echo "💡 Tips for best results:"
|
||||
echo " • Join your video call first"
|
||||
echo " • Use headphones to avoid echo"
|
||||
echo " • Close other audio sources (music, videos)"
|
||||
echo " • Speak clearly for better transcription"
|
||||
echo ""
|
||||
echo "────────────────────────────────────────────────────────────────────"
|
||||
echo ""
|
||||
|
||||
# Start audio capture and transcription
|
||||
echo -e "${GREEN}Starting audio capture from: $SOURCE${NC}"
|
||||
echo -e "${GREEN}Starting transcription service...${NC}"
|
||||
echo ""
|
||||
|
||||
# Use our new stdin-transcribe binary that accepts piped audio
|
||||
# parec captures system audio and pipes it directly to our transcriber
|
||||
# --no-vad disables Voice Activity Detection for system audio (YouTube, music, etc.)
|
||||
parec --format=s16le --rate=16000 --channels=1 --device="$SOURCE" 2>/dev/null | \
|
||||
cargo run --bin stdin-transcribe -- --language en --chunk-seconds 2.5 --no-vad
|
||||
Reference in New Issue
Block a user