stephane-klein/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Installation instructions:
$ mkdir -p ~/.local/bin
$ curl -o ~/.local/bin/text_to_audio.py https://gist.githubusercontent.com/stephane-klein/1406e746f0253956062d4adff7a692bd/raw/8571cdd91cae8ebcd208435daacf431cfc1cd353/text_to_audio.py
$ chmod +x ~/.local/bin/text_to_audio.py

  
## text_to_audio.py
#!/usr/bin/env python3
"""
Script to encode text to URL format and download audio from Google Translate
Usage: text_to_audio.py "your text here" [-o output.mp3] [-l en-GB]
"""

import sys
import urllib.parse
import urllib.request
import argparse
import os
from datetime import datetime

def generate_filename(text, language="en-GB"):
    """
    Generate automatic filename in format: ~/english_audio/YYYY-MM-DD_Text_With_Underscores.mp3

    Args:
        text: The text to convert to speech
        language: Language code (used for directory name)

    Returns:
        Full path to the output file
    """
    # Get current date in YYYY-MM-DD format
    date_str = datetime.now().strftime("%Y-%m-%d")

    # Clean the text for filename: replace spaces with underscores, remove special chars
    clean_text = text.replace(" ", "_")
    # Remove characters that are problematic in filenames
    clean_text = "".join(c for c in clean_text if c.isalnum() or c in ("_", "-"))

    # Determine directory name based on language
    if language.startswith("en"):
        dir_name = "english_audio"
    elif language.startswith("fr"):
        dir_name = "french_audio"
    elif language.startswith("es"):
        dir_name = "spanish_audio"
    else:
        dir_name = f"{language}_audio"

    # Build full path
    home_dir = os.path.expanduser("~")
    audio_dir = os.path.join(home_dir, dir_name)

    # Create directory if it doesn't exist
    os.makedirs(audio_dir, exist_ok=True)

    # Build filename
    filename = f"{date_str}_{clean_text}.mp3"
    full_path = os.path.join(audio_dir, filename)

    return full_path

def download_audio(text, language="en-GB", output_file=None):
    """
    Download audio pronunciation from Google Translate

    Args:
        text: The text to convert to speech
        language: Language code (en-GB for British, en for American)
        output_file: Output filename for the audio file (None for auto-generate)
    """
    # Generate filename if not provided
    if output_file is None:
        output_file = generate_filename(text, language)

    # Encode the text for URL
    encoded_text = urllib.parse.quote_plus(text)

    # Build the Google Translate TTS URL
    url = f"https://translate.google.com/translate_tts?ie=UTF-8&tl={language}&client=tw-ob&q={encoded_text}"

    # Set headers to mimic a browser request
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Referer': 'https://translate.google.com/'
    }

    try:
        # Create request with headers
        request = urllib.request.Request(url, headers=headers)

        # Download the audio file
        print(f"Downloading audio for: '{text}'")
        print(f"Language: {language}")
        print(f"URL: {url}")
        print(f"Saving to: {output_file}")

        with urllib.request.urlopen(request) as response:
            audio_data = response.read()

        # Save to file
        with open(output_file, 'wb') as f:
            f.write(audio_data)

        print(f"\n✓ Successfully downloaded to '{output_file}'")
        print(f"File size: {len(audio_data)} bytes")

    except Exception as e:
        print(f"\n✗ Error downloading audio: {e}", file=sys.stderr)
        sys.exit(1)

def main():
    # Set up argument parser
    parser = argparse.ArgumentParser(
        description='Download audio pronunciation from Google Translate',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  text_to_audio.py "Reinforcement Learning from Human Feedback"
    → ~/english_audio/2025-12-01_Reinforcement_Learning_from_Human_Feedback.mp3

  text_to_audio.py "Hello world" -o custom.mp3
    → custom.mp3

  text_to_audio.py "Bonjour le monde" -l fr
    → ~/french_audio/2025-12-01_Bonjour_le_monde.mp3

  text_to_audio.py "Hello" -l en
    → ~/english_audio/2025-12-01_Hello.mp3 (American English)
        """
    )

    parser.add_argument('text', nargs='+', help='Text to convert to speech')
    parser.add_argument('-o', '--output', default=None,
                       help='Output filename (default: auto-generated in ~/english_audio/)')
    parser.add_argument('-l', '--language', default='en-GB',
                       help='Language code (default: en-GB for British English, use en for American)')

    # Parse arguments
    args = parser.parse_args()

    # Join text arguments into a single string
    text = " ".join(args.text)

    # Download the audio
    download_audio(text, args.language, args.output)

if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Script to encode text to URL format and download audio from Google Translate
	Usage: text_to_audio.py "your text here" [-o output.mp3] [-l en-GB]
	"""

	import sys
	import urllib.parse
	import urllib.request
	import argparse
	import os
	from datetime import datetime

	def generate_filename(text, language="en-GB"):
	"""
	Generate automatic filename in format: ~/english_audio/YYYY-MM-DD_Text_With_Underscores.mp3

	Args:
	text: The text to convert to speech
	language: Language code (used for directory name)

	Returns:
	Full path to the output file
	"""
	# Get current date in YYYY-MM-DD format
	date_str = datetime.now().strftime("%Y-%m-%d")

	# Clean the text for filename: replace spaces with underscores, remove special chars
	clean_text = text.replace(" ", "_")
	# Remove characters that are problematic in filenames
	clean_text = "".join(c for c in clean_text if c.isalnum() or c in ("_", "-"))

	# Determine directory name based on language
	if language.startswith("en"):
	dir_name = "english_audio"
	elif language.startswith("fr"):
	dir_name = "french_audio"
	elif language.startswith("es"):
	dir_name = "spanish_audio"
	else:
	dir_name = f"{language}_audio"

	# Build full path
	home_dir = os.path.expanduser("~")
	audio_dir = os.path.join(home_dir, dir_name)

	# Create directory if it doesn't exist
	os.makedirs(audio_dir, exist_ok=True)

	# Build filename
	filename = f"{date_str}_{clean_text}.mp3"
	full_path = os.path.join(audio_dir, filename)

	return full_path

	def download_audio(text, language="en-GB", output_file=None):
	"""
	Download audio pronunciation from Google Translate

	Args:
	text: The text to convert to speech
	language: Language code (en-GB for British, en for American)
	output_file: Output filename for the audio file (None for auto-generate)
	"""
	# Generate filename if not provided
	if output_file is None:
	output_file = generate_filename(text, language)

	# Encode the text for URL
	encoded_text = urllib.parse.quote_plus(text)

	# Build the Google Translate TTS URL
	url = f"https://translate.google.com/translate_tts?ie=UTF-8&tl={language}&client=tw-ob&q={encoded_text}"

	# Set headers to mimic a browser request
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
	'Referer': 'https://translate.google.com/'
	}

	try:
	# Create request with headers
	request = urllib.request.Request(url, headers=headers)

	# Download the audio file
	print(f"Downloading audio for: '{text}'")
	print(f"Language: {language}")
	print(f"URL: {url}")
	print(f"Saving to: {output_file}")

	with urllib.request.urlopen(request) as response:
	audio_data = response.read()

	# Save to file
	with open(output_file, 'wb') as f:
	f.write(audio_data)

	print(f"\n✓ Successfully downloaded to '{output_file}'")
	print(f"File size: {len(audio_data)} bytes")

	except Exception as e:
	print(f"\n✗ Error downloading audio: {e}", file=sys.stderr)
	sys.exit(1)

	def main():
	# Set up argument parser
	parser = argparse.ArgumentParser(
	description='Download audio pronunciation from Google Translate',
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	text_to_audio.py "Reinforcement Learning from Human Feedback"
	→ ~/english_audio/2025-12-01_Reinforcement_Learning_from_Human_Feedback.mp3

	text_to_audio.py "Hello world" -o custom.mp3
	→ custom.mp3

	text_to_audio.py "Bonjour le monde" -l fr
	→ ~/french_audio/2025-12-01_Bonjour_le_monde.mp3

	text_to_audio.py "Hello" -l en
	→ ~/english_audio/2025-12-01_Hello.mp3 (American English)
	"""
	)

	parser.add_argument('text', nargs='+', help='Text to convert to speech')
	parser.add_argument('-o', '--output', default=None,
	help='Output filename (default: auto-generated in ~/english_audio/)')
	parser.add_argument('-l', '--language', default='en-GB',
	help='Language code (default: en-GB for British English, use en for American)')

	# Parse arguments
	args = parser.parse_args()

	# Join text arguments into a single string
	text = " ".join(args.text)

	# Download the audio
	download_audio(text, args.language, args.output)

	if __name__ == "__main__":
	main()
No results found