henri/txt-2-speech-english-local.bash

## txt-2-speech-english-local.bash
#!/bin/bash
#
# (C)Copyright 2025
# Henri Shustak
# Released Under the MIT licence
#
# Script which takes text via standard in and generates an audio file and then plays that audio file.
# Text to speach is processed via LocalAI API. You will require a API Key to get a valid response
# You will also need to setup LocalAI and specify your the host and port number (within the URL varable).
# Learn more about LocalAI : https://localai.io/
#
# Will be able to altered to work with other local / remote models as needed.
#
# Speak the clipboard : echo $(pbpaste) | ~/path/txt-2-speech-local.bash
# https://github.com/henri/handy-alias/blob/master/linux_alias.bash
#
# Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speach-local.bash
#
# version 1.0 initial implimentation
# version 1.1 added mpv option
#

API_KEY="replace-with-your-api-key"
API_URL="http://127.0.0.1:8080/tts"
MODEL="kokoro"

# on mac os you can install sox (cross platform) and just use the play command from that package.

# see if mpv or parecord are available on this system otherwise default to play
if $(which mpv 2>/dev/null 1>/dev/null) ; then
    PLAY_AUDIO_COMMAND="mpv --no-config"
elif $(which parecord 2>/dev/null 1>/dev/null) ; then
    PLAY_AUDIO_COMMAND="parecord --play"
else
    PLAY_AUDIO_COMMAND="play"
fi


DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
mkdir /tmp/text-2-speach 2>/dev/null
OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"
STANDARD_SPEED_FILE="/tmp/text-2-speach/audio_standard${DATETIME}.mp3"


# read all text from stdin
TEXT=$(cat)

# remove non-asci characters
TEXT=$(echo $TEXT | tr -cd '\0-\177')

echo "$TEXT"
read -p "press enter to convert to speach..." </dev/tty

# specify your text
# TEXT="Your text to convert to speech"


curl --show-error -L -X POST "${API_URL}" \
-H "xi-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: audio/mpeg" \
-d '{
  "input": "'"$TEXT"'",
  "model": "'"$MODEL"'",
  "output_format": "mp3_44100_128",
  "voice_settings": {
    "stability": 0.75,
    "similarity_boost": 0.75
  }
}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null

echo ""
echo "Audio Saved : $OUTPUT_FILE"
echo ""

read -p "press enter to play..." </dev/tty

# slow down playback (if your model is not supporting such an option)
mv -i $OUTPUT_FILE $STANDARD_SPEED_FILE
ffmpeg -i $STANDARD_SPEED_FILE -filter:a "atempo=0.87" $OUTPUT_FILE 2>/dev/null

$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"


## txt-2-speech-english.bash
#!/bin/bash
#
# (C)Copyright 2025
# Henri Shustak
# Released Under the MIT licence
#
# Script which takes text via standard in and generates an audio file and then plays that audio file.
# Text to speach is processed via elevenlabs.io API. You will require a  API Key to get a valid response
#
# Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speech-english.bash
#
# version 1.0 initial implimentation
# version 1.1 some additional error handiling
#


API_KEY="replace-with-your-api-key"

# on mac os you can install sox (cross platform) and just use the play command from that package.

VOICE_ID="wAGzRVkxKEs8La0lmdrE"
API_URL="https://api.11labs.io/v1/text-to-speech/$VOICE_ID"

# see if parecord is available on this system
if [[ $(which parecord 2>/dev/null 1>/dev/null) == 0 ]] ; then
    PLAY_AUDIO_COMMAND="parecord --play"
else
    PLAY_AUDIO_COMMAND="play"
fi


DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
mkdir /tmp/text-2-speach 2>/dev/null
OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"

# read all text from stdin
TEXT=$(cat)

# remove non-asci characters
TEXT=$(echo $TEXT | tr -cd '\0-\177')

# confirm what we are reading.
echo "$TEXT"
read -p "press enter to convert to speach..." </dev/tty

# specify your text
# TEXT="Your text to convert to speech"


curl --show-error -L -X POST \
"https://api.elevenlabs.io/v1/text-to-speech/$VOICE_ID" \
-H "xi-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: audio/mpeg" \
-d '{
  "text": "'"$TEXT"'",
  "output_format": "mp3_44100_128",
  "voice_settings": {
    "stability": 0.75,
    "similarity_boost": 0.75
  }
}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null

curl_result=${?}
if [[ ${curl_result} != 0 ]] ; then
    echo "error processing text to speach."
    exit -1
fi

echo ""
echo "Audio Saved : $OUTPUT_FILE"
echo ""


read -p "press enter to play..." </dev/tty

$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"


## txt-2-speech-japanese.bash
#!/bin/bash
#
# (C)Copyright 2025
# Henri Shustak
# Released Under the MIT licence
#
# Script which takes text via standard in and generates an audio file and then plays that audio file.
# Text to speach is processed via elevenlabs.io API. You will require a  API Key to get a valid response
#
# Usage : echo "hello, this is the text to speach system talking to you." | ~/bin/txt-2-speech-japanese.bash
#
# version 1.0 initial implimentation
# version 1.1 some additional error handiling
# version 1.2 support added for japanese characters
#


API_KEY="replace-with-your-api-key"

# on mac os you can install sox (cross platform) and just use the play command from that package.

# these are not that great if you know of a better one please leave a comment

#VOICE_ID="Mv8AjrYZCBkdsmDHNwcB"
VOICE_ID="DtsPFCrhbCbbJkwZsb3d"

API_URL="https://api.11labs.io/v1/text-to-speech/$VOICE_ID"

# see if parecord is available on this system
if [[ $(which parecord 2>/dev/null 1>/dev/null) == 0 ]] ; then
    PLAY_AUDIO_COMMAND="parecord --play"
else
    PLAY_AUDIO_COMMAND="play"
fi


DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
mkdir /tmp/text-2-speach 2>/dev/null
OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"

# read all text from stdin
TEXT=$(cat)

# check if perl is installed
which perl 2>&1 >> /dev/null || { echo "ERROR! : Perl is not detected on this system and is required." ; exit -99 ; }

# remove non-asci and non-japanese characters (requires perl)
TEXT=$( echo $TEXT | perl -CSD -pe 's/[^\x00-\x7F\x{3040}-\x{309F}\x{30A0}-\x{30FF}\x{4E00}-\x{9FFF}]//g' )

# confirm what we are reading.
echo "$TEXT"

read -p "press enter to convert to speach..." </dev/tty

# specify your text
# TEXT="Your text to convert to speech"


curl --show-error -L -X POST \
"https://api.elevenlabs.io/v1/text-to-speech/$VOICE_ID" \
-H "xi-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-H "Accept: audio/mpeg" \
-d '{
  "text": "'"$TEXT"'",
  "output_format": "mp3_44100_128",
  "voice_settings": {
    "stability": 0.75,
    "similarity_boost": 0.75
  }
}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null

curl_result=${?}
if [[ ${curl_result} != 0 ]] ; then
    echo "error processing text to speach."
    exit -1
fi

echo ""
echo "Audio Saved : $OUTPUT_FILE"
echo ""

#ls "$OUTPUT_FILE"

read -p "press enter to play..." </dev/tty

#echo $PLAY_AUDIO_COMMAND

$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"

#ls "$OUTPUT_FILE"
	#!/bin/bash
	#
	# (C)Copyright 2025
	# Henri Shustak
	# Released Under the MIT licence
	#
	# Script which takes text via standard in and generates an audio file and then plays that audio file.
	# Text to speach is processed via LocalAI API. You will require a API Key to get a valid response
	# You will also need to setup LocalAI and specify your the host and port number (within the URL varable).
	# Learn more about LocalAI : https://localai.io/
	#
	# Will be able to altered to work with other local / remote models as needed.
	#
	# Speak the clipboard : echo $(pbpaste) \| ~/path/txt-2-speech-local.bash
	# https://github.com/henri/handy-alias/blob/master/linux_alias.bash
	#
	# Usage : echo "hello, this is the text to speach system talking to you." \| ~/bin/txt-2-speach-local.bash
	#
	# version 1.0 initial implimentation
	# version 1.1 added mpv option
	#

	API_KEY="replace-with-your-api-key"
	API_URL="http://127.0.0.1:8080/tts"
	MODEL="kokoro"

	# on mac os you can install sox (cross platform) and just use the play command from that package.

	# see if mpv or parecord are available on this system otherwise default to play
	if $(which mpv 2>/dev/null 1>/dev/null) ; then
	PLAY_AUDIO_COMMAND="mpv --no-config"
	elif $(which parecord 2>/dev/null 1>/dev/null) ; then
	PLAY_AUDIO_COMMAND="parecord --play"
	else
	PLAY_AUDIO_COMMAND="play"
	fi


	DATETIME=$(date +"%Y-%m-%d_%H-%M-%S")
	mkdir /tmp/text-2-speach 2>/dev/null
	OUTPUT_FILE="/tmp/text-2-speach/audio_${DATETIME}.mp3"
	STANDARD_SPEED_FILE="/tmp/text-2-speach/audio_standard${DATETIME}.mp3"


	# read all text from stdin
	TEXT=$(cat)

	# remove non-asci characters
	TEXT=$(echo $TEXT \| tr -cd '\0-\177')

	echo "$TEXT"
	read -p "press enter to convert to speach..." </dev/tty

	# specify your text
	# TEXT="Your text to convert to speech"


	curl --show-error -L -X POST "${API_URL}" \
	-H "xi-api-key: $API_KEY" \
	-H "Content-Type: application/json" \
	-H "Accept: audio/mpeg" \
	-d '{
	"input": "'"$TEXT"'",
	"model": "'"$MODEL"'",
	"output_format": "mp3_44100_128",
	"voice_settings": {
	"stability": 0.75,
	"similarity_boost": 0.75
	}
	}' --output "$OUTPUT_FILE" 2> /dev/null 1>/dev/null

	echo ""
	echo "Audio Saved : $OUTPUT_FILE"
	echo ""

	read -p "press enter to play..." </dev/tty

	# slow down playback (if your model is not supporting such an option)
	mv -i $OUTPUT_FILE $STANDARD_SPEED_FILE
	ffmpeg -i $STANDARD_SPEED_FILE -filter:a "atempo=0.87" $OUTPUT_FILE 2>/dev/null

	$PLAY_AUDIO_COMMAND "$OUTPUT_FILE"
No results found