Last active
December 8, 2025 04:16
-
-
Save henri/da3c9b133cba1622cd0ae9aaad8645fe to your computer and use it in GitHub Desktop.
elevenlabs.io and localai.io API scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # (C)Copyright 2025 | |
| # Henri Shustak | |
| # Released Under the MIT licence | |
| # | |
| # Script which takes text via standard in and generates an audio file and then plays that audio file. | |
| # Text to speach is processed via LocalAI API. You will require a API Key to get a valid response | |
| # You will also need to setup LocalAI and specify your the host and port number (within the URL varable). | |
| # Learn more about LocalAI : https://localai.io/ | |
| # | |
| # Will be able to altered to work with other local / remote models as needed. | |
| # | |
| # Speak the clipboard : echo $(pbpaste) | ~/path/txt-2-speech-local.bash | |
| # https://github.com/henri/handy-alias/blob/master/linux_alias.bash | |
| # | |
| # Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speach-local.bash | |
| # | |
| # version 1.0 initial implimentation | |
| # version 1.1 added mpv option | |
| # | |
| API_KEY="replace-with-your-api-key" | |
| API_URL="http://127.0.0.1:8080/tts" | |
| MODEL="kokoro" | |
| # on mac os you can install sox (cross platform) and just use the play command from that package. | |
| # see if mpv or parecord are available on this system otherwise default to play | |
| if $(which mpv 2>/dev/null 1>/dev/null) ; then | |
| PLAY_AUDIO_COMMAND="mpv --no-config" | |
| elif $(which parecord 2>/dev/null 1>/dev/null) ; then | |
| PLAY_AUDIO_COMMAND="parecord --play" | |
| else | |
| PLAY_AUDIO_COMMAND="play" | |
| fi | |
| DATETIME=$(date +"%Y-%m-%d_%H-%M-%S") | |
| mkdir /tmp/text-2-speach 2>/dev/null | |
| OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3" | |
| STANDARD_SPEED_FILE="/tmp/text-2-speach/audio_standard${DATETIME}.mp3" | |
| # read all text from stdin | |
| TEXT=$(cat) | |
| # remove non-asci characters | |
| TEXT=$(echo $TEXT | tr -cd '\0-\177') | |
| echo "$TEXT" | |
| read -p "press enter to convert to speach..." </dev/tty | |
| # specify your text | |
| # TEXT="Your text to convert to speech" | |
| curl --show-error -L -X POST "${API_URL}" \ | |
| -H "xi-api-key: $API_KEY" \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: audio/mpeg" \ | |
| -d '{ | |
| "input": "'"$TEXT"'", | |
| "model": "'"$MODEL"'", | |
| "output_format": "mp3_44100_128", | |
| "voice_settings": { | |
| "stability": 0.75, | |
| "similarity_boost": 0.75 | |
| } | |
| }' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null | |
| echo "" | |
| echo "Audio Saved : $OUTPUT_FILE" | |
| echo "" | |
| read -p "press enter to play..." </dev/tty | |
| # slow down playback (if your model is not supporting such an option) | |
| mv -i $OUTPUT_FILE $STANDARD_SPEED_FILE | |
| ffmpeg -i $STANDARD_SPEED_FILE -filter:a "atempo=0.87" $OUTPUT_FILE 2>/dev/null | |
| $PLAY_AUDIO_COMMAND "$OUTPUT_FILE" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # (C)Copyright 2025 | |
| # Henri Shustak | |
| # Released Under the MIT licence | |
| # | |
| # Script which takes text via standard in and generates an audio file and then plays that audio file. | |
| # Text to speach is processed via elevenlabs.io API. You will require a API Key to get a valid response | |
| # | |
| # Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speech-english.bash | |
| # | |
| # version 1.0 initial implimentation | |
| # version 1.1 some additional error handiling | |
| # | |
| API_KEY="replace-with-your-api-key" | |
| # on mac os you can install sox (cross platform) and just use the play command from that package. | |
| VOICE_ID="wAGzRVkxKEs8La0lmdrE" | |
| API_URL="https://api.11labs.io/v1/text-to-speech/$VOICE_ID" | |
| # see if parecord is available on this system | |
| if [[ $(which parecord 2>/dev/null 1>/dev/null) == 0 ]] ; then | |
| PLAY_AUDIO_COMMAND="parecord --play" | |
| else | |
| PLAY_AUDIO_COMMAND="play" | |
| fi | |
| DATETIME=$(date +"%Y-%m-%d_%H-%M-%S") | |
| mkdir /tmp/text-2-speach 2>/dev/null | |
| OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3" | |
| # read all text from stdin | |
| TEXT=$(cat) | |
| # remove non-asci characters | |
| TEXT=$(echo $TEXT | tr -cd '\0-\177') | |
| # confirm what we are reading. | |
| echo "$TEXT" | |
| read -p "press enter to convert to speach..." </dev/tty | |
| # specify your text | |
| # TEXT="Your text to convert to speech" | |
| curl --show-error -L -X POST \ | |
| "https://api.elevenlabs.io/v1/text-to-speech/$VOICE_ID" \ | |
| -H "xi-api-key: $API_KEY" \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: audio/mpeg" \ | |
| -d '{ | |
| "text": "'"$TEXT"'", | |
| "output_format": "mp3_44100_128", | |
| "voice_settings": { | |
| "stability": 0.75, | |
| "similarity_boost": 0.75 | |
| } | |
| }' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null | |
| curl_result=${?} | |
| if [[ ${curl_result} != 0 ]] ; then | |
| echo "error processing text to speach." | |
| exit -1 | |
| fi | |
| echo "" | |
| echo "Audio Saved : $OUTPUT_FILE" | |
| echo "" | |
| read -p "press enter to play..." </dev/tty | |
| $PLAY_AUDIO_COMMAND "$OUTPUT_FILE" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # | |
| # (C)Copyright 2025 | |
| # Henri Shustak | |
| # Released Under the MIT licence | |
| # | |
| # Script which takes text via standard in and generates an audio file and then plays that audio file. | |
| # Text to speach is processed via elevenlabs.io API. You will require a API Key to get a valid response | |
| # | |
| # Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speech-japanese.bash | |
| # | |
| # version 1.0 initial implimentation | |
| # version 1.1 some additional error handiling | |
| # version 1.2 support added for japanese characters | |
| # | |
| API_KEY="replace-with-your-api-key" | |
| # on mac os you can install sox (cross platform) and just use the play command from that package. | |
| # these are not that great if you know of a better one please leave a comment | |
| #VOICE_ID="Mv8AjrYZCBkdsmDHNwcB" | |
| VOICE_ID="DtsPFCrhbCbbJkwZsb3d" | |
| API_URL="https://api.11labs.io/v1/text-to-speech/$VOICE_ID" | |
| # see if parecord is available on this system | |
| if [[ $(which parecord 2>/dev/null 1>/dev/null) == 0 ]] ; then | |
| PLAY_AUDIO_COMMAND="parecord --play" | |
| else | |
| PLAY_AUDIO_COMMAND="play" | |
| fi | |
| DATETIME=$(date +"%Y-%m-%d_%H-%M-%S") | |
| mkdir /tmp/text-2-speach 2>/dev/null | |
| OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3" | |
| # read all text from stdin | |
| TEXT=$(cat) | |
| # check if perl is installed | |
| which perl 2>&1 >> /dev/null || { echo "ERROR! : Perl is not detected on this system and is required." ; exit -99 ; } | |
| # remove non-asci and non-japanese characters (requires perl) | |
| TEXT=$( echo $TEXT | perl -CSD -pe 's/[^\x00-\x7F\x{3040}-\x{309F}\x{30A0}-\x{30FF}\x{4E00}-\x{9FFF}]//g' ) | |
| # confirm what we are reading. | |
| echo "$TEXT" | |
| read -p "press enter to convert to speach..." </dev/tty | |
| # specify your text | |
| # TEXT="Your text to convert to speech" | |
| curl --show-error -L -X POST \ | |
| "https://api.elevenlabs.io/v1/text-to-speech/$VOICE_ID" \ | |
| -H "xi-api-key: $API_KEY" \ | |
| -H "Content-Type: application/json" \ | |
| -H "Accept: audio/mpeg" \ | |
| -d '{ | |
| "text": "'"$TEXT"'", | |
| "output_format": "mp3_44100_128", | |
| "voice_settings": { | |
| "stability": 0.75, | |
| "similarity_boost": 0.75 | |
| } | |
| }' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null | |
| curl_result=${?} | |
| if [[ ${curl_result} != 0 ]] ; then | |
| echo "error processing text to speach." | |
| exit -1 | |
| fi | |
| echo "" | |
| echo "Audio Saved : $OUTPUT_FILE" | |
| echo "" | |
| #ls "$OUTPUT_FILE" | |
| read -p "press enter to play..." </dev/tty | |
| #echo $PLAY_AUDIO_COMMAND | |
| $PLAY_AUDIO_COMMAND "$OUTPUT_FILE" | |
| #ls "$OUTPUT_FILE" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment