mirror of
https://github.com/aljazceru/transcription-api.git
synced 2025-12-17 07:14:24 +01:00
initial commit
This commit is contained in:
91
proto/transcription.proto
Normal file
91
proto/transcription.proto
Normal file
@@ -0,0 +1,91 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package transcription;
|
||||
|
||||
// The transcription service provides real-time speech-to-text capabilities
|
||||
service TranscriptionService {
|
||||
// Bidirectional streaming: send audio chunks, receive transcriptions
|
||||
rpc StreamTranscribe(stream AudioChunk) returns (stream TranscriptionResult);
|
||||
|
||||
// Unary call for single audio file transcription
|
||||
rpc TranscribeFile(AudioFile) returns (TranscriptionResponse);
|
||||
|
||||
// Get available models and languages
|
||||
rpc GetCapabilities(Empty) returns (Capabilities);
|
||||
|
||||
// Health check
|
||||
rpc HealthCheck(Empty) returns (HealthStatus);
|
||||
}
|
||||
|
||||
// Audio chunk for streaming
|
||||
message AudioChunk {
|
||||
bytes audio_data = 1; // PCM16 audio data (16-bit, 16kHz, mono)
|
||||
string session_id = 2; // Optional session ID for tracking
|
||||
AudioConfig config = 3; // Optional config (only needed in first chunk)
|
||||
}
|
||||
|
||||
// Audio configuration
|
||||
message AudioConfig {
|
||||
string language = 1; // Language code (e.g., "en", "es", "auto")
|
||||
string task = 2; // "transcribe" or "translate"
|
||||
string model = 3; // Model size: "tiny", "base", "small", "medium", "large-v3"
|
||||
int32 sample_rate = 4; // Sample rate (default: 16000)
|
||||
bool vad_enabled = 5; // Voice Activity Detection
|
||||
}
|
||||
|
||||
// Transcription result for streaming
|
||||
message TranscriptionResult {
|
||||
string text = 1; // Transcribed text
|
||||
float start_time = 2; // Start time in seconds
|
||||
float end_time = 3; // End time in seconds
|
||||
bool is_final = 4; // Is this a final result?
|
||||
float confidence = 5; // Confidence score (0-1)
|
||||
string language = 6; // Detected language
|
||||
string session_id = 7; // Session ID for tracking
|
||||
int64 timestamp_ms = 8; // Server timestamp in milliseconds
|
||||
}
|
||||
|
||||
// Complete audio file for transcription
|
||||
message AudioFile {
|
||||
bytes audio_data = 1; // Complete audio file data
|
||||
string format = 2; // Format: "wav", "mp3", "webm", "raw_pcm16"
|
||||
AudioConfig config = 3; // Audio configuration
|
||||
}
|
||||
|
||||
// Response for file transcription
|
||||
message TranscriptionResponse {
|
||||
repeated TranscriptionSegment segments = 1;
|
||||
string full_text = 2; // Complete transcription
|
||||
string detected_language = 3;
|
||||
float duration_seconds = 4;
|
||||
}
|
||||
|
||||
// Transcription segment
|
||||
message TranscriptionSegment {
|
||||
string text = 1;
|
||||
float start_time = 2;
|
||||
float end_time = 3;
|
||||
float confidence = 4;
|
||||
}
|
||||
|
||||
// Service capabilities
|
||||
message Capabilities {
|
||||
repeated string available_models = 1;
|
||||
repeated string supported_languages = 2;
|
||||
repeated string supported_formats = 3;
|
||||
int32 max_audio_length_seconds = 4;
|
||||
bool streaming_supported = 5;
|
||||
bool vad_supported = 6;
|
||||
}
|
||||
|
||||
// Health status
|
||||
message HealthStatus {
|
||||
bool healthy = 1;
|
||||
string status = 2;
|
||||
string model_loaded = 3;
|
||||
int64 uptime_seconds = 4;
|
||||
int32 active_sessions = 5;
|
||||
}
|
||||
|
||||
// Empty message for requests without parameters
|
||||
message Empty {}
|
||||
Reference in New Issue
Block a user