Skip to content

Instantly share code, notes, and snippets.

@henri
Last active December 8, 2025 04:16
Show Gist options
  • Select an option

  • Save henri/da3c9b133cba1622cd0ae9aaad8645fe to your computer and use it in GitHub Desktop.

Select an option

Save henri/da3c9b133cba1622cd0ae9aaad8645fe to your computer and use it in GitHub Desktop.
elevenlabs.io and localai.io API scripts
#!/bin/bash
#
# (C)Copyright 2025
# Henri Shustak
# Released Under the MIT licence
#
# Script which takes text via standard in and generates an audio file and then plays that audio file.
# Text to speach is processed via LocalAI API. You will require a API Key to get a valid response
# You will also need to setup LocalAI and specify your the host and port number (within the URL varable).
# Learn more about LocalAI : https://localai.io/
#
# Will be able to altered to work with other local / remote models as needed.
#
# Speak the clipboard : echo $(pbpaste) | ~/path/txt-2-speech-local.bash
# https://github.com/henri/handy-alias/blob/master/linux_alias.bash
#
# Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speach-local.bash
#
# version 1.0 initial implimentation
# version 1.1 added mpv option
#
API_KEY="replace-with-your-api-key"
API_URL="http://127.0.0.1:8080/tts"
MODEL="kokoro"
# on mac os you can install sox (cross platform) and just use the play command from that package.
# see if mpv or parecord are available on this system otherwise default to play
if $(which mpv 2>/dev/null 1>/dev/null) ; then
PLAY_AUDIO_COMMAND="mpv --no-config"
elif $(which parecord 2>/dev/null 1>/dev/null) ; then
PLAY_AUDIO_COMMAND="parecord --play"
else
PLAY_AUDIO_COMMAND="play"
fi
DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
mkdir /tmp/text-2-speach 2>/dev/null
OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"
STANDARD_SPEED_FILE="/tmp/text-2-speach/audio_standard${DATETIME}.mp3"
# read all text from stdin
TEXT=$(cat)
# remove non-asci characters
TEXT=$(echo $TEXT | tr -cd '\0-\177')
echo "$TEXT"
read -p "press enter to convert to speach..." </dev/tty
# specify your text
# TEXT="Your text to convert to speech"
curl --show-error -L -X POST "${API_URL}" \
-H "xi-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: audio/mpeg" \
-d '{
"input": "'"$TEXT"'",
"model": "'"$MODEL"'",
"output_format": "mp3_44100_128",
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null
echo ""
echo "Audio Saved : $OUTPUT_FILE"
echo ""
read -p "press enter to play..." </dev/tty
# slow down playback (if your model is not supporting such an option)
mv -i $OUTPUT_FILE $STANDARD_SPEED_FILE
ffmpeg -i $STANDARD_SPEED_FILE -filter:a "atempo=0.87" $OUTPUT_FILE 2>/dev/null
$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"
#!/bin/bash
#
# (C)Copyright 2025
# Henri Shustak
# Released Under the MIT licence
#
# Script which takes text via standard in and generates an audio file and then plays that audio file.
# Text to speach is processed via elevenlabs.io API. You will require a API Key to get a valid response
#
# Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speech-english.bash
#
# version 1.0 initial implimentation
# version 1.1 some additional error handiling
#
API_KEY="replace-with-your-api-key"
# on mac os you can install sox (cross platform) and just use the play command from that package.
VOICE_ID="wAGzRVkxKEs8La0lmdrE"
API_URL="https://api.11labs.io/v1/text-to-speech/$VOICE_ID"
# see if parecord is available on this system
if [[ $(which parecord 2>/dev/null 1>/dev/null) == 0 ]] ; then
PLAY_AUDIO_COMMAND="parecord --play"
else
PLAY_AUDIO_COMMAND="play"
fi
DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
mkdir /tmp/text-2-speach 2>/dev/null
OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"
# read all text from stdin
TEXT=$(cat)
# remove non-asci characters
TEXT=$(echo $TEXT | tr -cd '\0-\177')
# confirm what we are reading.
echo "$TEXT"
read -p "press enter to convert to speach..." </dev/tty
# specify your text
# TEXT="Your text to convert to speech"
curl --show-error -L -X POST \
"https://api.elevenlabs.io/v1/text-to-speech/$VOICE_ID" \
-H "xi-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: audio/mpeg" \
-d '{
"text": "'"$TEXT"'",
"output_format": "mp3_44100_128",
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null
curl_result=${?}
if [[ ${curl_result} != 0 ]] ; then
echo "error processing text to speach."
exit -1
fi
echo ""
echo "Audio Saved : $OUTPUT_FILE"
echo ""
read -p "press enter to play..." </dev/tty
$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"
#!/bin/bash
#
# (C)Copyright 2025
# Henri Shustak
# Released Under the MIT licence
#
# Script which takes text via standard in and generates an audio file and then plays that audio file.
# Text to speach is processed via elevenlabs.io API. You will require a API Key to get a valid response
#
# Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speech-japanese.bash
#
# version 1.0 initial implimentation
# version 1.1 some additional error handiling
# version 1.2 support added for japanese characters
#
API_KEY="replace-with-your-api-key"
# on mac os you can install sox (cross platform) and just use the play command from that package.
# these are not that great if you know of a better one please leave a comment
#VOICE_ID="Mv8AjrYZCBkdsmDHNwcB"
VOICE_ID="DtsPFCrhbCbbJkwZsb3d"
API_URL="https://api.11labs.io/v1/text-to-speech/$VOICE_ID"
# see if parecord is available on this system
if [[ $(which parecord 2>/dev/null 1>/dev/null) == 0 ]] ; then
PLAY_AUDIO_COMMAND="parecord --play"
else
PLAY_AUDIO_COMMAND="play"
fi
DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
mkdir /tmp/text-2-speach 2>/dev/null
OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"
# read all text from stdin
TEXT=$(cat)
# check if perl is installed
which perl 2>&1 >> /dev/null || { echo "ERROR! : Perl is not detected on this system and is required." ; exit -99 ; }
# remove non-asci and non-japanese characters (requires perl)
TEXT=$( echo $TEXT | perl -CSD -pe 's/[^\x00-\x7F\x{3040}-\x{309F}\x{30A0}-\x{30FF}\x{4E00}-\x{9FFF}]//g' )
# confirm what we are reading.
echo "$TEXT"
read -p "press enter to convert to speach..." </dev/tty
# specify your text
# TEXT="Your text to convert to speech"
curl --show-error -L -X POST \
"https://api.elevenlabs.io/v1/text-to-speech/$VOICE_ID" \
-H "xi-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: audio/mpeg" \
-d '{
"text": "'"$TEXT"'",
"output_format": "mp3_44100_128",
"voice_settings": {
"stability": 0.75,
"similarity_boost": 0.75
}
}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null
curl_result=${?}
if [[ ${curl_result} != 0 ]] ; then
echo "error processing text to speach."
exit -1
fi
echo ""
echo "Audio Saved : $OUTPUT_FILE"
echo ""
#ls "$OUTPUT_FILE"
read -p "press enter to play..." </dev/tty
#echo $PLAY_AUDIO_COMMAND
$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"
#ls "$OUTPUT_FILE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment