# Voxtype Configuration
#
# Location: ~/.config/voxtype/config.toml
# All settings can be overridden via CLI flags

# Transcription engine: "whisper" (default) or "parakeet"
# Whisper: whisper.cpp via whisper-rs (most compatible)
# Parakeet: NVIDIA FastConformer via ONNX Runtime (requires --features parakeet)
# engine = "whisper"

# State file for external integrations (Waybar, polybar, etc.)
# Use "auto" for default location ($XDG_RUNTIME_DIR/voxtype/state),
# a custom path, or "disabled" to turn off. The daemon writes state
# ("idle", "recording", "transcribing") to this file whenever it changes.
# Required for `voxtype record toggle` and `voxtype status` commands.
state_file = "auto"

[hotkey]
# Built-in hotkey using evdev (Linux input subsystem)
#
# Most users should leave this disabled and use compositor keybindings instead:
#   - Hyprland: bind/bindr in hyprland.conf
#   - Sway: bindsym --no-repeat/--release in config
#   - River: riverctl map / map -release
#
# Enable this if you're on X11, using a compositor without key-release support,
# or prefer a dedicated key like ScrollLock. Requires 'input' group membership.
enabled = false

# Key to hold for push-to-talk (when enabled = true)
# Common choices: SCROLLLOCK, PAUSE, RIGHTALT, F13-F24
# Use `evtest` to find key names for your keyboard
key = "SCROLLLOCK"

# Optional modifier keys that must also be held
# Example: modifiers = ["LEFTCTRL", "LEFTALT"]
modifiers = []

# Activation mode: "push_to_talk" or "toggle"
# - push_to_talk: Hold hotkey to record, release to transcribe (default)
# - toggle: Press hotkey once to start recording, press again to stop
# mode = "push_to_talk"

[audio]
# Audio input device ("default" uses system default)
# List devices with: pactl list sources short
device = "default"

# Sample rate in Hz (whisper expects 16000)
sample_rate = 16000

# Maximum recording duration in seconds (safety limit)
max_duration_secs = 60

# [audio.feedback]
# Enable audio feedback sounds (beeps when recording starts/stops)
# enabled = true
#
# Sound theme: "default", "subtle", "mechanical", or path to custom theme directory
# theme = "default"
#
# Volume level (0.0 to 1.0)
# volume = 0.7

[whisper]
# Execution mode: "local" (default) or "remote"
# - local: Use whisper.cpp locally via FFI
# - remote: Send audio to OpenAI-compatible API endpoint
# mode = "local"

# Model to use for transcription (local mode)
# Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v3, large-v3-turbo
# .en models are English-only but faster and more accurate for English
# Or provide absolute path to a custom .bin model file
model = "base.en"

# Language for transcription
# Use "en" for English, "auto" for auto-detection
# See: https://github.com/openai/whisper#available-models-and-languages
language = "en"

# Translate non-English speech to English
translate = false

# Number of CPU threads for inference (omit for auto-detection)
# threads = 4

# Load model on-demand when recording starts (true) or keep loaded (false)
# When true, model is loaded when recording starts and unloaded after transcription
# When false, model is kept in memory for faster response times (default)
on_demand_loading = false

# GPU memory isolation mode
# Enable on laptops with hybrid graphics to let dGPU sleep between transcriptions.
# gpu_isolation = true

# Context window optimization for short recordings (disabled by default)
# Speeds up transcription for clips under 22.5 seconds. Disabled by default
# because some models (especially large-v3/turbo) may experience repetition
# loops. Enable if you want faster transcription and don't experience issues.
# context_window_optimization = true

# --- Remote mode settings (used when mode = "remote") ---
# remote_endpoint = "http://192.168.1.100:8080"  # Required
# remote_model = "whisper-1"                      # Model to request from server
# remote_api_key = "sk-..."                       # Or use VOXTYPE_WHISPER_API_KEY env var
# remote_timeout_secs = 30

# --- CLI mode settings (used when mode = "cli") ---
# Uses whisper-cli subprocess instead of whisper-rs FFI bindings.
# Fallback for systems where whisper-rs crashes (e.g., glibc 2.42+ on Ubuntu 25.10).
# Requires whisper-cli from whisper.cpp: https://github.com/ggerganov/whisper.cpp
# whisper_cli_path = "/usr/local/bin/whisper-cli"  # Optional, searches PATH if not set

# [parakeet]
# Parakeet configuration (only used when engine = "parakeet")
# Requires: cargo build --features parakeet
#
# Model name (from ~/.local/share/voxtype/models/) or absolute path
# model = "parakeet-tdt-0.6b-v3"
#
# Model type: "tdt" (recommended, proper punctuation) or "ctc" (faster, char-level)
# Auto-detected from model directory if not specified
# model_type = "tdt"
#
# on_demand_loading = false

[output]
# Primary output mode: "type", "clipboard", or "paste"
# - type: Simulates keyboard input at cursor position (requires wtype or ydotool)
# - clipboard: Copies text to clipboard (requires wl-copy)
# - paste: Copies to clipboard then simulates paste keystroke (requires wl-copy and wtype or ydotool)
mode = "type"

# Fall back to clipboard if typing fails
fallback_to_clipboard = true

# Delay between typed characters in milliseconds
# 0 = fastest possible, increase if characters are dropped
type_delay_ms = 0

# Delay before typing starts (ms)
# Allows virtual keyboard to initialize. Some users report this helps prevent
# the first character from being dropped on text insertion. Try 100-200ms.
# Note: When using compositor integration (via `voxtype setup compositor`),
# best results come from not binding Escape in the submap. Some users have
# had success with Escape bound by increasing this delay, but the most
# consistent fix is to use F12 or another key instead.
pre_type_delay_ms = 0

# Keystroke for paste mode (when mode = "paste")
# Default is "ctrl+v". Change for environments with different paste shortcuts.
# Examples:
#   paste_keys = "ctrl+v"        # Standard (default)
#   paste_keys = "shift+insert"  # Hyprland/Omarchy universal paste
#   paste_keys = "ctrl+shift+v"  # Some terminal emulators
# paste_keys = "ctrl+v"

# Compositor integration hooks
# Use `voxtype setup compositor hyprland|sway|river` for automatic setup.
#
# pre_recording_command: Runs when recording starts. Switch to a submap/mode
#   where F12 can cancel recording/transcription.
# pre_output_command: Runs before typing output. Switch to a submap/mode that
#   blocks modifier keys from triggering compositor shortcuts during typing.
# post_output_command: Runs after typing output. Reset to normal mode.
#
# Example (Hyprland):
# pre_recording_command = "hyprctl dispatch submap voxtype_recording"
# pre_output_command = "hyprctl dispatch submap voxtype_suppress"
# post_output_command = "hyprctl dispatch submap reset"

[output.notification]
# Show notification when recording starts (hotkey pressed)
on_recording_start = false

# Show notification when recording stops (transcription beginning)
on_recording_stop = false

# Show notification with transcribed text after transcription completes
on_transcription = true

# [output.post_process]
# Pipe transcribed text through an external command for cleanup before output.
# The command receives text on stdin and outputs processed text on stdout.
# Useful for LLM-based text cleanup, grammar correction, filler word removal.
# On any failure (timeout, error), falls back to original transcription.
#
# command = "ollama run llama3.2:1b 'Clean up this dictation. Fix grammar, remove filler words. Output only the cleaned text:'"
# timeout_ms = 30000  # 30 second timeout (generous for LLM)

# [text]
# Text processing options (word replacements, spoken punctuation)
#
# Enable spoken punctuation conversion (e.g., say "period" to get ".")
# spoken_punctuation = false
#
# Custom word replacements (case-insensitive)
# replacements = { "vox type" = "voxtype" }

[status]
# Status display icons for Waybar/tray integrations
#
# Icon theme (or path to custom theme file):
#   Font-based (require specific fonts):
#     - "emoji"     - Default emoji icons (🎙️ 🎤 ⏳)
#     - "nerd-font" - Nerd Font icons (requires Nerd Font)
#     - "material"  - Material Design Icons (requires MDI font)
#     - "phosphor"  - Phosphor Icons (requires Phosphor font)
#     - "codicons"  - VS Code icons (requires Codicons font)
#     - "omarchy"   - Omarchy distro icons
#   Universal (no special fonts needed):
#     - "minimal"   - Simple Unicode (○ ● ◐ ×)
#     - "dots"      - Geometric shapes (◯ ⬤ ◔ ◌)
#     - "arrows"    - Media player style (▶ ● ↻ ■)
#     - "text"      - Plain text ([MIC] [REC] [...] [OFF])
icon_theme = "emoji"
#
# Per-state icon overrides (optional, takes precedence over theme)
# [status.icons]
# idle = "🎙️"
# recording = "🎤"
# transcribing = "⏳"
# stopped = ""