chalg/stocks_vs_10-yr_yield.py

## stocks_vs_10-yr_yield.py
# Data source: https://www.investing.com/rates-bonds/australia-10-year-bond-yield-historical-data.
# Likely need to sign up for a free account to access the data.
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime
import numpy as np
from typing import List, Optional


def analyze_stocks_vs_interest_rates(
    tickers: List[str],
    start_date: str = '2021-07-01',
    end_date: Optional[str] = None,
    plot_type: str = 'individual',  # 'individual', 'combined', or 'both'
    csv_file_path: str = 'data/Australia 10-Year Bond Yield Historical Data.csv'
):
    """
    Analyze correlation between stock tickers and Australian 10-Year Bond Yield.

    Parameters:
    -----------
    tickers : List[str]
        List of stock tickers to analyze (e.g., ['AFG.AX', 'PPM.AX'])
    start_date : str
        Start date in 'YYYY-MM-DD' format
    end_date : str, optional
        End date in 'YYYY-MM-DD' format. If None, uses current date.
    plot_type : str
        'individual' - separate plots for each ticker
        'combined' - all tickers on one plot
        'both' - both individual and combined plots
    csv_file_path : str
        Path to the Australian bond yield CSV file

    Returns:
    --------
    dict: Dictionary containing correlation data and statistics for each ticker
    """

    if end_date is None:
        end_date = pd.Timestamp.now()

    # Load interest rate data
    treasury_data, proxy_name, proxy_ticker = load_interest_rate_data(csv_file_path, start_date, end_date)

    # Dictionary to store results
    results = {}
    stock_data = {}

    # Fetch data for all tickers
    print(f"Fetching data for tickers: {', '.join(tickers)}")
    for ticker in tickers:
        try:
            stock = yf.download(ticker, start=start_date, end=end_date, multi_level_index=False)
            if not stock.empty:
                stock_data[ticker] = stock
                print(f"✓ Successfully loaded {ticker}")
            else:
                print(f"✗ No data found for {ticker}")
        except Exception as e:
            print(f"✗ Error loading {ticker}: {e}")

    # Analyze each ticker
    for ticker in stock_data.keys():
        stock = stock_data[ticker]

        # Align the datasets
        data = pd.DataFrame({
            f'{ticker}_Close': stock['Close'],
            'Interest_Rate': treasury_data['Close']
        })

        # Drop rows with missing values
        data.dropna(inplace=True)

        if len(data) == 0:
            print(f"Warning: No overlapping data for {ticker}")
            continue

        # Calculate correlation
        correlation = data[f'{ticker}_Close'].corr(data['Interest_Rate'])

        # Store results
        results[ticker] = {
            'data': data,
            'correlation': correlation,
            'price_range': (data[f'{ticker}_Close'].min(), data[f'{ticker}_Close'].max()),
            'price_volatility': data[f'{ticker}_Close'].std(),
            'data_points': len(data),
            'date_range': (data.index[0], data.index[-1])
        }

    # Generate plots based on plot_type
    if plot_type in ['individual', 'both']:
        plot_individual_charts(results, proxy_name, proxy_ticker, treasury_data)

    if plot_type in ['combined', 'both']:
        plot_combined_chart(results, proxy_name, proxy_ticker)

    # Print summary statistics
    print_summary_statistics(results, proxy_name, proxy_ticker, treasury_data)

    return results


def load_interest_rate_data(csv_file_path: str, start_date: str, end_date: str):
    """Load Australian 10Y Bond Yield data from CSV with fallback to US Treasury."""
    try:
        # Read the CSV file
        au_yield_df = pd.read_csv(csv_file_path)

        # Clean and process the data
        au_yield_df['Date'] = pd.to_datetime(au_yield_df['Date'], format='%d/%m/%Y')
        au_yield_df.set_index('Date', inplace=True)
        au_yield_df.sort_index(inplace=True)

        # Create treasury_data DataFrame
        treasury_data = pd.DataFrame({
            'Close': au_yield_df['Price']
        })

        proxy_name = 'Australian 10Y Bond Yield'
        proxy_ticker = 'AU 10Y (CSV)'
        print(f"✓ Successfully loaded Australian 10Y Bond Yield from CSV")
        print(f"  Data range: {treasury_data.index[0]} to {treasury_data.index[-1]}")

    except FileNotFoundError:
        print("✗ CSV file not found. Falling back to US Treasury as proxy...")
        treasury_data = yf.download('^TNX', start=start_date, end=end_date, multi_level_index=False)
        proxy_name = 'US 10Y Treasury (fallback)'
        proxy_ticker = '^TNX'
        print(f"✓ Loaded {proxy_name}")

    return treasury_data, proxy_name, proxy_ticker


def plot_individual_charts(results: dict, proxy_name: str, proxy_ticker: str, treasury_data: pd.DataFrame):
    """Create individual charts for each ticker."""
    print(f"\n=== Creating Individual Charts ===")

    for ticker, result in results.items():
        data = result['data']
        correlation = result['correlation']

        fig, ax1 = plt.subplots(figsize=(10, 6))

        # Plot stock price
        ax1.set_xlabel('Date')
        ax1.set_ylabel(ticker, color='tab:blue')
        ax1.plot(data.index, data[f'{ticker}_Close'], color='tab:blue',
                label=f'{ticker} Stock Price', linewidth=2)
        ax1.tick_params(axis='y', labelcolor='tab:blue')

        # Plot interest rate
        ax2 = ax1.twinx()
        ax2.set_ylabel(f'{proxy_name}', color='tab:red')
        ax2.plot(data.index, data['Interest_Rate'], color='tab:red',
                label=proxy_name, linewidth=2)
        ax2.tick_params(axis='y', labelcolor='tab:red')

        # Formatting
        plt.title(f'{get_company_name(ticker)} vs {proxy_name}\nCorrelation: {correlation:.3f}',
                 fontsize=12, pad=20)
        ax1.grid(True, alpha=0.3)

        # Attribution
        ax1.text(0.95, 0.01, 'Source: Yahoo Finance | Investing.com',
                verticalalignment='bottom', horizontalalignment='right',
                transform=ax1.transAxes, color='grey', fontsize=6.5)

        fig.tight_layout()
        plt.show()


def plot_combined_chart(results: dict, proxy_name: str, proxy_ticker: str):
    """Create a combined chart with all tickers normalised and bond yield on second y-axis."""
    print(f"\n=== Creating Combined Chart ===")

    if len(results) == 0:
        print("No data to plot")
        return

    fig, ax1 = plt.subplots(figsize=(12, 8))

    # Colors for different stocks
    colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:purple', 'tab:brown', 'tab:pink']

    # Plot normalised stock prices on primary axis
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Normalised Price (Base=100)', color='black')
    ax1.set_title('Stock Prices vs Australian 10Y Bond Yield (Normalised)', fontsize=14, pad=20)

    for i, (ticker, result) in enumerate(results.items()):
        data = result['data']
        # Normalize to base 100
        normalised_price = (data[f'{ticker}_Close'] / data[f'{ticker}_Close'].iloc[0]) * 100
        ax1.plot(data.index, normalised_price, color=colors[i % len(colors)],
                label=f'{ticker} (r={result["correlation"]:.3f})', linewidth=2)

    ax1.tick_params(axis='y', labelcolor='black')
    ax1.grid(True, alpha=0.3)

    # Create second y-axis for bond yield
    ax2 = ax1.twinx()
    ax2.set_ylabel(f'{proxy_name} (%)', color='tab:red')

    # Get interest rate data from first available ticker
    first_ticker = list(results.keys())[0]
    interest_rate_data = results[first_ticker]['data']['Interest_Rate']
    ax2.plot(interest_rate_data.index, interest_rate_data, color='tab:red',
            label=proxy_name, linewidth=2, alpha=0.8)
    ax2.tick_params(axis='y', labelcolor='tab:red')

    # Combine legends from both axes
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc='lower right', fontsize=9, framealpha=0.9)

    # Attribution
    ax1.text(0.05, 0.01, 'Source: Yahoo Finance | Investing.com',
            verticalalignment='bottom', horizontalalignment='left',
            transform=ax1.transAxes, color='grey', fontsize=6.5)

    fig.tight_layout()
    plt.show()


def print_summary_statistics(results: dict, proxy_name: str, proxy_ticker: str, treasury_data: pd.DataFrame):
    """Print comprehensive summary statistics."""
    print(f"\n" + "="*60)
    print(f"CORRELATION ANALYSIS SUMMARY")
    print(f"="*60)
    print(f"Proxy used: {proxy_name} ({proxy_ticker})")

    if not results:
        print("No valid data found for analysis")
        return

    # Interest rate statistics
    all_rates = []
    for result in results.values():
        all_rates.extend(result['data']['Interest_Rate'].values)

    print(f"\nInterest Rate Statistics:")
    print(f"  Range: {min(all_rates):.3f}% - {max(all_rates):.3f}%")
    print(f"  Volatility (std): {np.std(all_rates):.3f}%")

    print(f"\nIndividual Stock Analysis:")
    print(f"{'Ticker':<8} {'Company':<25} {'Correlation':<12} {'Price Range':<20} {'Volatility':<12} {'Data Points':<12}")
    print("-" * 90)

    correlations = []
    for ticker, result in results.items():
        company_name = get_company_name(ticker)[:24]  # Truncate long names
        correlation = result['correlation']
        correlations.append(correlation)
        price_min, price_max = result['price_range']
        volatility = result['price_volatility']
        data_points = result['data_points']

        print(f"{ticker:<8} {company_name:<25} {correlation:>8.4f}    "
              f"${price_min:>6.2f} - ${price_max:<7.2f} {volatility:>8.2f}    {data_points:>8}")

    print("-" * 90)
    print(f"Average Correlation: {np.mean(correlations):>8.4f}")
    print(f"Correlation Range:   {min(correlations):>8.4f} to {max(correlations):<8.4f}")


def get_company_name(ticker: str) -> str:
    """Get company name from ticker (basic mapping for common Australian stocks)."""
    company_names = {
        'AFG.AX': 'Australian Finance Group',
        'PPM.AX': 'Pepper Money Limited',
        'CKF.AX': 'Collins Foods Limited',
        'EDV.AX': 'Endeavour Group Limited',
        'NCK.AX': 'Nick Scali Limited',
        'PMV.AX': 'Premier Investments Limited',
        'DMP.AX': 'Domino\'s Pizza Enterprises',
        'TPW.AX': 'Temple & Webster Group',
    }
    return company_names.get(ticker, ticker)


# Example usage
if __name__ == "__main__":
    # Example 1: Individual plots for 4 tickers
    tickers = ['AFG.AX', 'PPM.AX', 'CKF.AX', 'EDV.AX']

    print("Example 1: Individual plots")
    results1 = analyze_stocks_vs_interest_rates(
        tickers=tickers,
        start_date='2021-07-01',
        plot_type='individual'
    )

    print("\n" + "="*60)
    print("Example 2: Combined plot")
    results2 = analyze_stocks_vs_interest_rates(
        tickers=tickers,
        start_date='2021-07-01',
        plot_type='combined'
    )

    print("\n" + "="*60)
    print("Example 3: Both individual and combined plots")
    results3 = analyze_stocks_vs_interest_rates(
        tickers=['AFG.AX', 'PPM.AX'],
        start_date='2021-07-01',  # Same date range as other examples
        plot_type='both'
    )
	# Data source: https://www.investing.com/rates-bonds/australia-10-year-bond-yield-historical-data.
	# Likely need to sign up for a free account to access the data.
	import pandas as pd
	import matplotlib.pyplot as plt
	import yfinance as yf
	from datetime import datetime
	import numpy as np
	from typing import List, Optional


	def analyze_stocks_vs_interest_rates(
	tickers: List[str],
	start_date: str = '2021-07-01',
	end_date: Optional[str] = None,
	plot_type: str = 'individual', # 'individual', 'combined', or 'both'
	csv_file_path: str = 'data/Australia 10-Year Bond Yield Historical Data.csv'
	):
	"""
	Analyze correlation between stock tickers and Australian 10-Year Bond Yield.

	Parameters:
	-----------
	tickers : List[str]
	List of stock tickers to analyze (e.g., ['AFG.AX', 'PPM.AX'])
	start_date : str
	Start date in 'YYYY-MM-DD' format
	end_date : str, optional
	End date in 'YYYY-MM-DD' format. If None, uses current date.
	plot_type : str
	'individual' - separate plots for each ticker
	'combined' - all tickers on one plot
	'both' - both individual and combined plots
	csv_file_path : str
	Path to the Australian bond yield CSV file

	Returns:
	--------
	dict: Dictionary containing correlation data and statistics for each ticker
	"""

	if end_date is None:
	end_date = pd.Timestamp.now()

	# Load interest rate data
	treasury_data, proxy_name, proxy_ticker = load_interest_rate_data(csv_file_path, start_date, end_date)

	# Dictionary to store results
	results = {}
	stock_data = {}

	# Fetch data for all tickers
	print(f"Fetching data for tickers: {', '.join(tickers)}")
	for ticker in tickers:
	try:
	stock = yf.download(ticker, start=start_date, end=end_date, multi_level_index=False)
	if not stock.empty:
	stock_data[ticker] = stock
	print(f"✓ Successfully loaded {ticker}")
	else:
	print(f"✗ No data found for {ticker}")
	except Exception as e:
	print(f"✗ Error loading {ticker}: {e}")

	# Analyze each ticker
	for ticker in stock_data.keys():
	stock = stock_data[ticker]

	# Align the datasets
	data = pd.DataFrame({
	f'{ticker}_Close': stock['Close'],
	'Interest_Rate': treasury_data['Close']
	})

	# Drop rows with missing values
	data.dropna(inplace=True)

	if len(data) == 0:
	print(f"Warning: No overlapping data for {ticker}")
	continue

	# Calculate correlation
	correlation = data[f'{ticker}_Close'].corr(data['Interest_Rate'])

	# Store results
	results[ticker] = {
	'data': data,
	'correlation': correlation,
	'price_range': (data[f'{ticker}_Close'].min(), data[f'{ticker}_Close'].max()),
	'price_volatility': data[f'{ticker}_Close'].std(),
	'data_points': len(data),
	'date_range': (data.index[0], data.index[-1])
	}

	# Generate plots based on plot_type
	if plot_type in ['individual', 'both']:
	plot_individual_charts(results, proxy_name, proxy_ticker, treasury_data)

	if plot_type in ['combined', 'both']:
	plot_combined_chart(results, proxy_name, proxy_ticker)

	# Print summary statistics
	print_summary_statistics(results, proxy_name, proxy_ticker, treasury_data)

	return results


	def load_interest_rate_data(csv_file_path: str, start_date: str, end_date: str):
	"""Load Australian 10Y Bond Yield data from CSV with fallback to US Treasury."""
	try:
	# Read the CSV file
	au_yield_df = pd.read_csv(csv_file_path)

	# Clean and process the data
	au_yield_df['Date'] = pd.to_datetime(au_yield_df['Date'], format='%d/%m/%Y')
	au_yield_df.set_index('Date', inplace=True)
	au_yield_df.sort_index(inplace=True)

	# Create treasury_data DataFrame
	treasury_data = pd.DataFrame({
	'Close': au_yield_df['Price']
	})

	proxy_name = 'Australian 10Y Bond Yield'
	proxy_ticker = 'AU 10Y (CSV)'
	print(f"✓ Successfully loaded Australian 10Y Bond Yield from CSV")
	print(f" Data range: {treasury_data.index[0]} to {treasury_data.index[-1]}")

	except FileNotFoundError:
	print("✗ CSV file not found. Falling back to US Treasury as proxy...")
	treasury_data = yf.download('^TNX', start=start_date, end=end_date, multi_level_index=False)
	proxy_name = 'US 10Y Treasury (fallback)'
	proxy_ticker = '^TNX'
	print(f"✓ Loaded {proxy_name}")

	return treasury_data, proxy_name, proxy_ticker


	def plot_individual_charts(results: dict, proxy_name: str, proxy_ticker: str, treasury_data: pd.DataFrame):
	"""Create individual charts for each ticker."""
	print(f"\n=== Creating Individual Charts ===")

	for ticker, result in results.items():
	data = result['data']
	correlation = result['correlation']

	fig, ax1 = plt.subplots(figsize=(10, 6))

	# Plot stock price
	ax1.set_xlabel('Date')
	ax1.set_ylabel(ticker, color='tab:blue')
	ax1.plot(data.index, data[f'{ticker}_Close'], color='tab:blue',
	label=f'{ticker} Stock Price', linewidth=2)
	ax1.tick_params(axis='y', labelcolor='tab:blue')

	# Plot interest rate
	ax2 = ax1.twinx()
	ax2.set_ylabel(f'{proxy_name}', color='tab:red')
	ax2.plot(data.index, data['Interest_Rate'], color='tab:red',
	label=proxy_name, linewidth=2)
	ax2.tick_params(axis='y', labelcolor='tab:red')

	# Formatting
	plt.title(f'{get_company_name(ticker)} vs {proxy_name}\nCorrelation: {correlation:.3f}',
	fontsize=12, pad=20)
	ax1.grid(True, alpha=0.3)

	# Attribution
	ax1.text(0.95, 0.01, 'Source: Yahoo Finance \| Investing.com',
	verticalalignment='bottom', horizontalalignment='right',
	transform=ax1.transAxes, color='grey', fontsize=6.5)

	fig.tight_layout()
	plt.show()


	def plot_combined_chart(results: dict, proxy_name: str, proxy_ticker: str):
	"""Create a combined chart with all tickers normalised and bond yield on second y-axis."""
	print(f"\n=== Creating Combined Chart ===")

	if len(results) == 0:
	print("No data to plot")
	return

	fig, ax1 = plt.subplots(figsize=(12, 8))

	# Colors for different stocks
	colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:purple', 'tab:brown', 'tab:pink']

	# Plot normalised stock prices on primary axis
	ax1.set_xlabel('Date')
	ax1.set_ylabel('Normalised Price (Base=100)', color='black')
	ax1.set_title('Stock Prices vs Australian 10Y Bond Yield (Normalised)', fontsize=14, pad=20)

	for i, (ticker, result) in enumerate(results.items()):
	data = result['data']
	# Normalize to base 100
	normalised_price = (data[f'{ticker}_Close'] / data[f'{ticker}_Close'].iloc[0]) * 100
	ax1.plot(data.index, normalised_price, color=colors[i % len(colors)],
	label=f'{ticker} (r={result["correlation"]:.3f})', linewidth=2)

	ax1.tick_params(axis='y', labelcolor='black')
	ax1.grid(True, alpha=0.3)

	# Create second y-axis for bond yield
	ax2 = ax1.twinx()
	ax2.set_ylabel(f'{proxy_name} (%)', color='tab:red')

	# Get interest rate data from first available ticker
	first_ticker = list(results.keys())[0]
	interest_rate_data = results[first_ticker]['data']['Interest_Rate']
	ax2.plot(interest_rate_data.index, interest_rate_data, color='tab:red',
	label=proxy_name, linewidth=2, alpha=0.8)
	ax2.tick_params(axis='y', labelcolor='tab:red')

	# Combine legends from both axes
	lines1, labels1 = ax1.get_legend_handles_labels()
	lines2, labels2 = ax2.get_legend_handles_labels()
	ax1.legend(lines1 + lines2, labels1 + labels2, loc='lower right', fontsize=9, framealpha=0.9)

	# Attribution
	ax1.text(0.05, 0.01, 'Source: Yahoo Finance \| Investing.com',
	verticalalignment='bottom', horizontalalignment='left',
	transform=ax1.transAxes, color='grey', fontsize=6.5)

	fig.tight_layout()
	plt.show()


	def print_summary_statistics(results: dict, proxy_name: str, proxy_ticker: str, treasury_data: pd.DataFrame):
	"""Print comprehensive summary statistics."""
	print(f"\n" + "="*60)
	print(f"CORRELATION ANALYSIS SUMMARY")
	print(f"="*60)
	print(f"Proxy used: {proxy_name} ({proxy_ticker})")

	if not results:
	print("No valid data found for analysis")
	return

	# Interest rate statistics
	all_rates = []
	for result in results.values():
	all_rates.extend(result['data']['Interest_Rate'].values)

	print(f"\nInterest Rate Statistics:")
	print(f" Range: {min(all_rates):.3f}% - {max(all_rates):.3f}%")
	print(f" Volatility (std): {np.std(all_rates):.3f}%")

	print(f"\nIndividual Stock Analysis:")
	print(f"{'Ticker':<8} {'Company':<25} {'Correlation':<12} {'Price Range':<20} {'Volatility':<12} {'Data Points':<12}")
	print("-" * 90)

	correlations = []
	for ticker, result in results.items():
	company_name = get_company_name(ticker)[:24] # Truncate long names
	correlation = result['correlation']
	correlations.append(correlation)
	price_min, price_max = result['price_range']
	volatility = result['price_volatility']
	data_points = result['data_points']

	print(f"{ticker:<8} {company_name:<25} {correlation:>8.4f} "
	f"${price_min:>6.2f} - ${price_max:<7.2f} {volatility:>8.2f} {data_points:>8}")

	print("-" * 90)
	print(f"Average Correlation: {np.mean(correlations):>8.4f}")
	print(f"Correlation Range: {min(correlations):>8.4f} to {max(correlations):<8.4f}")


	def get_company_name(ticker: str) -> str:
	"""Get company name from ticker (basic mapping for common Australian stocks)."""
	company_names = {
	'AFG.AX': 'Australian Finance Group',
	'PPM.AX': 'Pepper Money Limited',
	'CKF.AX': 'Collins Foods Limited',
	'EDV.AX': 'Endeavour Group Limited',
	'NCK.AX': 'Nick Scali Limited',
	'PMV.AX': 'Premier Investments Limited',
	'DMP.AX': 'Domino\'s Pizza Enterprises',
	'TPW.AX': 'Temple & Webster Group',
	}
	return company_names.get(ticker, ticker)


	# Example usage
	if __name__ == "__main__":
	# Example 1: Individual plots for 4 tickers
	tickers = ['AFG.AX', 'PPM.AX', 'CKF.AX', 'EDV.AX']

	print("Example 1: Individual plots")
	results1 = analyze_stocks_vs_interest_rates(
	tickers=tickers,
	start_date='2021-07-01',
	plot_type='individual'
	)

	print("\n" + "="*60)
	print("Example 2: Combined plot")
	results2 = analyze_stocks_vs_interest_rates(
	tickers=tickers,
	start_date='2021-07-01',
	plot_type='combined'
	)

	print("\n" + "="*60)
	print("Example 3: Both individual and combined plots")
	results3 = analyze_stocks_vs_interest_rates(
	tickers=['AFG.AX', 'PPM.AX'],
	start_date='2021-07-01', # Same date range as other examples
	plot_type='both'
	)
No results found