#! /usr/bin/env bash # # The authors of this file have waived all copyright and # related or neighboring rights to the extent permitted by # law as described by the CC0 1.0 Universal Public Domain # Dedication. You should have received a copy of the full # dedication along with this file, typically as a file # named . If not, it may be available at # . # # # This is Qtts, a text to speech CLI. # # See . # # # TODO: This doc is old. Remove it after it's completely obsolete. # # Usage: qtts [...] # # Converts text to speech. # # If at least one is given, the input is the space-separated # concatenation of the s. Otherwise, the input is read from # standard input. # # By default, this script will try to find and use the best text to # speech model you have available. To use a specific model, set the # QTTS_MODEL environment variable to one of the models detailed below. # # By default, the speech is played aloud using your sound card. To save # it to an audio file instead, set the QTTS_OUTPUT environment variable # to a file path. The output file format will be inferred from the file # extension (.wav, .ogg, .m4a, etc). # # Unless otherwise stated, all models are run locally inside a Docker # container with internet access disabled. However, the first time you # use such an model, it may need to run a one-time build process that # accesses the internet. This one-time build process will never leak # your input, though. # # The supported models are as follows: # # chatterbox # # This is . # # dotnet # # This is the SpeechSynthesizer class from .NET, run # directly on your host machine using PowerShell. # # It's unlikely that this model will ever access the # internet, but it's still just a .NET call on your host # machine. Use at your own risk. # # If you're on Windows and don't have QTTS_MODEL set, this # model will always be tried as a last resort. # # kokoro # # This is . # #----------------------------------------------------------------------- # Set up initial error handling #----------------------------------------------------------------------- set -e -u || exit #----------------------------------------------------------------------- # Set up qsh #----------------------------------------------------------------------- # # This section was generated using qsh. # See . # # The authors of this section have waived all copyright and # related or neighboring rights to the extent permitted by # law as described by the CC0 1.0 Universal Public Domain # Dedication. You should have received a copy of the full # dedication along with this file, typically as a file # named . If not, it may be available at # . # qsh_barf() { case $# in (0) set "Unknown error" esac qsh_barf_message="$0: Error:" for qsh_barf_text; do qsh_barf_message="$qsh_barf_message $qsh_barf_text" done qsh_barf_message=$qsh_barf_message. printf '%s\n' "$qsh_barf_message" >&2 exit "${qsh_exit_status-1}" } #----------------------------------------------------------------------- # Set up Bash #----------------------------------------------------------------------- case ${BASH_VERSINFO-} in ('' | *[!0-9]* | 0* | [0-4]) qsh_barf \ "This script requires Bash version 5.0 or later." \ "You have version ${BASH_VERSION-unknown}" \ ; esac set -o pipefail shopt -s \ inherit_errexit \ ; #----------------------------------------------------------------------- declare -r -x LC_ALL=C unset IFS #----------------------------------------------------------------------- # Create a temporary directory #----------------------------------------------------------------------- if tmpdir=$(mktemp -d 2>/dev/null); then if [[ ${tmpdir:0:1} != / ]]; then if [[ ${PWD: -1:1} == / ]]; then tmpdir=$PWD$tmpdir else tmpdir=$PWD/$tmpdir fi fi if [[ ${tmpdir: -1:1} == / ]]; then tmpdir+=. fi else tmpdir=${TMPDIR:-/tmp} if [[ ${tmpdir:0:1} != / ]]; then if [[ ${PWD: -1:1} == / ]]; then tmpdir=$PWD$tmpdir else tmpdir=$PWD/$tmpdir fi fi mkdir -p "$tmpdir" if [[ ${tmpdir: -1:1} != / ]]; then tmpdir+=/ fi tmpdir+=tmp. x=bcdfghjklmnpqrstvwxyz for ((i = 0; i < 10; ++i)); do tmpdir+=${x:RANDOM % ${#x}:1} done mkdir "$tmpdir" fi chmod 700 "$tmpdir" readonly tmpdir trap ' readonly exit_status=$? rm -f -r "$tmpdir" || : exit $exit_status ' EXIT #----------------------------------------------------------------------- # Parse the command-line arguments #----------------------------------------------------------------------- declare -A -r short_to_long=( [-m]=--model [-o]=--output [-v]=--voice ) parse_options=1 unset input_lines while (($# > 0)); do if ((parse_options)); then #------------------------------------------------------------------- # Options terminator #------------------------------------------------------------------- case $1 in (--) parse_options=0 shift continue #------------------------------------------------------------------- # --model=, -m #------------------------------------------------------------------- ;; (--model=*) QTTS_MODEL=${1#*=} shift continue #------------------------------------------------------------------- # --output=, -o #------------------------------------------------------------------- ;; (--output=*) QTTS_OUTPUT=${1#*=} shift continue #------------------------------------------------------------------- # --voice=, -v #------------------------------------------------------------------- ;; (--voice=*) QTTS_VOICE=${1#*=} shift continue #------------------------------------------------------------------- # --foo bar to --foo=bar adjustments #------------------------------------------------------------------- ;; ( \ --model | \ --output | \ --voice \ ) if (($# < 2)); then printf '%s\n' "$0: $1 requires an argument" >&2 exit 1 fi x=$1=$2 shift 2 set -- "$x" "$@" continue #------------------------------------------------------------------- # -f bar to --foo=bar adjustments #------------------------------------------------------------------- ;; (-[mov]) if (($# < 2)); then printf '%s\n' "$0: $1 requires an argument" >&2 exit 1 fi x=${short_to_long[$1]}=$2 shift 2 set -- "$x" "$@" continue #------------------------------------------------------------------- # -fbar to --foo=bar adjustments #------------------------------------------------------------------- ;; (-[mov]?*) x=${short_to_long[${1:0:2}]}=${1:2} shift set -- "$x" "$@" continue #------------------------------------------------------------------- # Long options incorrectly given arguments #------------------------------------------------------------------- ;; ( \ --=* \ ) x=${1%%=*} printf '%s\n' "$0: $x forbids an argument" >&2 exit 1 #------------------------------------------------------------------- # Unknown long options #------------------------------------------------------------------- ;; (--*) x=${1%%=*} printf '%s\n' "$0: Unknown option: ${x@Q}" >&2 exit 1 #------------------------------------------------------------------- # Unknown short options #------------------------------------------------------------------- ;; (-?*) x=${1:0:2} printf '%s\n' "$0: Unknown option: ${x@Q}" >&2 exit 1 #------------------------------------------------------------------- esac fi if [[ ! ${input_lines+x} ]]; then input_lines=$1 else input_lines+=" $1" fi shift done if [[ ! ${input_lines+x} ]]; then input_lines=$(cat) fi # # Some models pause on newlines or can't handle long input very well. # To fix this, we join each run of nonempty lines into a single line and # parse the resulting lines into an array to feed into the model one at # a time. We also remove comment leaders like # and //. # IFS=$'\n' input_lines=($( eval " awk"' \ '\'' { if ($0) { if (/^ *# /) { sub(/^ *# */, ""); } else if (/^ *\/\/ /) { sub(/^ *\/\/ */, ""); } if (buf) { buf = buf " "; } buf = buf $0; } else if (buf) { print buf; buf = ""; } } END { if (buf) { print buf; } } '\'' \ <<<"$input_lines" \ ;' )) readonly input_lines unset IFS #----------------------------------------------------------------------- if [[ ! ${QTTS_HAVE_DOCKER+x} ]]; then QTTS_HAVE_DOCKER=0 if command -v docker &>/dev/null; then QTTS_HAVE_DOCKER=1 fi fi readonly QTTS_HAVE_DOCKER if [[ ! ${QTTS_DOCKER_GPUS+x} ]]; then QTTS_DOCKER_GPUS= if [[ $QTTS_HAVE_DOCKER == 1 ]]; then if docker run --rm --gpus all hello-world &>/dev/null; then QTTS_DOCKER_GPUS='--gpus all' fi fi fi readonly QTTS_DOCKER_GPUS #----------------------------------------------------------------------- run_dockerized_model() { local -r model=$1 local -n image=QTTS_${model@U}_IMAGE if [[ ! ${image+x} ]]; then image=docker.io/quinngrier/qtts-$model fi docker image inspect "$image" >/dev/null && : if (($? != 0)); then docker pull "$image" >&2 fi pid= trap ' if [[ $pid ]]; then kill $pid || : fi ' SIGINT if [[ ! ${QTTS_OUTPUT+x} ]]; then ( cd "$tmpdir" for ((i = 0; i < ${#input_lines[@]}; ++i)); do while [[ ! -f $i.raw ]]; do sleep 0.2 done ffplay \ -loglevel fatal \ -nodisp \ -autoexit \ -f f32le \ -ar 24000 \ -ch_layout mono \ -i $i.raw \ >/dev/null \ ; done ) & pid=$! fi pushd "$tmpdir" >/dev/null for ((i = 0; i < ${#input_lines[@]}; ++i)); do docker run \ $QTTS_DOCKER_GPUS \ --network none \ --rm \ --stop-timeout 0 \ -i \ ${QTTS_VOICE+-e QTTS_VOICE="$QTTS_VOICE"} \ "$image" \ python /main.py \ <<<"${input_lines[i]}" \ >$i.raw.tmp \ ; mv -f $i.raw.tmp $i.raw done popd >/dev/null if [[ ${QTTS_OUTPUT+x} ]]; then srcs= for ((i = 0; i < ${#input_lines[@]}; ++i)); do srcs+=" $i.raw" done pushd "$tmpdir" >/dev/null dst=dst.${QTTS_OUTPUT##*/} cat $srcs | ffmpeg \ -loglevel fatal \ -f f32le \ -ar 24000 \ -ch_layout mono \ -i - \ "$dst" \ ; popd >/dev/null mv -f "$tmpdir/$dst" "$QTTS_OUTPUT" else wait $pid fi }; readonly -f run_dockerized_model #----------------------------------------------------------------------- # chatterbox #----------------------------------------------------------------------- if [[ ${QTTS_MODEL-} == chatterbox ]]; then if [[ ! $QTTS_DOCKER_GPUS ]]; then printf '%s\n' "$0: QTTS_MODEL=chatterbox requires QTTS_DOCKER_GPUS." >&2 exit 1 fi x=$(docker images -q qtts-chatterbox) if [[ ! $x ]]; then docker build -t qtts-chatterbox.tmp - <<<' FROM python:3.9 RUN pip install chatterbox-tts==0.1.1 RUN printf '\''%s\n'\'' \ '\''from chatterbox.tts import ChatterboxTTS'\'' \ '\''from contextlib import redirect_stdout'\'' \ '\''import os'\'' \ '\''import sys'\'' \ '\''text = sys.stdin.read()'\'' \ '\''with open(os.devnull, "w") as f, redirect_stdout(f):'\'' \ '\'' model = ChatterboxTTS.from_pretrained(device="cuda")'\'' \ '\'' speech = model.generate(text, cfg_weight=0.3)'\'' \ '\''speech = speech.cpu().numpy().tobytes()'\'' \ '\''sys.stdout.buffer.write(speech)'\'' \ >/run.py \ ; ' >&2 docker rm -f qtts-chatterbox.tmp docker run \ $QTTS_DOCKER_GPUS \ --name qtts-chatterbox.tmp \ --stop-timeout 0 \ -i \ qtts-chatterbox.tmp \ python /run.py \ <<<"hello" \ >/dev/null \ ; docker commit qtts-chatterbox.tmp qtts-chatterbox docker rm qtts-chatterbox.tmp docker rmi qtts-chatterbox.tmp fi pid= trap ' if [[ $pid ]]; then kill $pid || : fi ' SIGINT if [[ ! ${QTTS_OUTPUT+x} ]]; then ( cd "$tmpdir" for ((i = 0; i < ${#input_lines[@]}; ++i)); do while [[ ! -f $i.raw ]]; do sleep 0.2 done ffplay \ -loglevel fatal \ -nodisp \ -autoexit \ -f f32le \ -ar 24000 \ -ch_layout mono \ -i $i.raw \ >/dev/null \ ; done ) & pid=$! fi pushd "$tmpdir" >/dev/null for ((i = 0; i < ${#input_lines[@]}; ++i)); do docker run \ $QTTS_DOCKER_GPUS \ --network none \ --rm \ --stop-timeout 0 \ -i \ qtts-chatterbox \ python /run.py \ <<<"${input_lines[i]}" \ >$i.raw.tmp \ 2>/dev/null \ ; mv -f $i.raw.tmp $i.raw done popd >/dev/null if [[ ${QTTS_OUTPUT+x} ]]; then srcs= for ((i = 0; i < ${#input_lines[@]}; ++i)); do srcs+=" $i.raw" done pushd "$tmpdir" >/dev/null dst=dst.${QTTS_OUTPUT##*/} cat $srcs | ffmpeg \ -loglevel fatal \ -f f32le \ -ar 24000 \ -ch_layout mono \ -i - \ "$dst" \ ; popd >/dev/null mv -f "$tmpdir/$dst" "$QTTS_OUTPUT" else wait $pid fi exit fi #----------------------------------------------------------------------- # kitten #----------------------------------------------------------------------- if [[ ${QTTS_MODEL-} == kitten ]]; then run_dockerized_model kitten exit fi #----------------------------------------------------------------------- # kokoro #----------------------------------------------------------------------- if [[ ${QTTS_MODEL-} == kokoro || \ ( ! ${QTTS_MODEL+x} && $QTTS_HAVE_DOCKER == 1 ) ]]; then run_dockerized_model kokoro exit fi #----------------------------------------------------------------------- # .NET #----------------------------------------------------------------------- if [[ ${QTTS_MODEL-} == dotnet || \ ( ! ${QTTS_MODEL+x} && ${WINDIR+x} ) ]]; then if [[ ${QTTS_OUTPUT+x} ]]; then printf '%s\n' "QTTS_OUTPUT is not supported for this model yet." >&2 exit 1 fi x=$( awk ' BEGIN { q = "'\''"; } { if (NR > 1) { printf "%s", " + \"`r`n\" + " } gsub(/'\''/, q q); printf "%s", q $0 q; } ' <<<"${input_lines[@]}" ) unset pid trap ' case ${pid-} in *?) kill -s INT $pid || : esac ' INT PowerShell -Command - <&2 exit 1