MostHumble/epsilon_trap_experiment.py

## epsilon_trap_experiment.py
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd

# ============================================================================
# CONFIGURATION
# ============================================================================

# Optimization parameters
STEPS = 400
LEARNING_RATE = 0.5
START_POS = [1.0, 1.0]
SCALE_X = 1e-4  # Creates the "Micro-Canyon" geometry

# Epsilon values to test
EPS_LIST = [1e-6, 1e-8, 1e-10]
COLORS = ['blue', 'green', 'red']  # Explicit mapping for clarity

# Animation parameters
FPS = 15
DURATION_SEC = 16


# ============================================================================
# CORE FUNCTIONS
# ============================================================================

def loss_fn(x, y):
    """Micro-Canyon loss landscape: steep in Y, flat in X."""
    return (x * SCALE_X)**2 + y**2


def run_optimizer(epsilon, track_metrics=False):
    """
    Run Adam optimizer with specified epsilon value.

    Args:
        epsilon: Adam's epsilon hyperparameter
        track_metrics: If True, also track gradients and update sizes

    Returns:
        If track_metrics=False: numpy array of shape (STEPS, 2) containing trajectory
        If track_metrics=True: tuple of (trajectory, gradients_x, updates_x)
    """
    pos = torch.tensor(START_POS, requires_grad=True, dtype=torch.float32)
    optimizer = torch.optim.Adam([pos], lr=LEARNING_RATE, eps=epsilon)

    path = []
    grads_x = [] if track_metrics else None
    updates_x = [] if track_metrics else None

    for _ in range(STEPS):
        # Store current position
        p_prev = pos.detach().clone()
        path.append(p_prev.numpy().copy())

        # Compute gradients
        optimizer.zero_grad()
        loss = (pos[0] * SCALE_X)**2 + pos[1]**2
        loss.backward()

        # Track metrics if requested
        if track_metrics:
            grads_x.append(abs(pos.grad[0].item()))

        # Take optimization step
        optimizer.step()

        # Track update size if requested
        if track_metrics:
            p_curr = pos.detach()
            updates_x.append(abs(p_curr[0].item() - p_prev[0].item()))

    path = np.array(path)

    if track_metrics:
        return path, np.array(grads_x), np.array(updates_x)
    else:
        return path


# ============================================================================
# VISUALIZATION 1: LOSS LANDSCAPE (ROTATING GIF)
# ============================================================================

def create_loss_landscape_gif(filename='elongated_canyon.gif'):
    """Generate rotating 3D visualization of the loss landscape."""
    print("Generating loss landscape...")

    # Generate surface data
    X_range = np.linspace(-6.0, 6.0, 120)
    Y_range = np.linspace(-2.0, 2.0, 120)
    X_grid, Y_grid = np.meshgrid(X_range, Y_range)
    Z_grid = loss_fn(X_grid, Y_grid)

    # Setup figure
    fig = plt.figure(figsize=(10, 5))
    ax = fig.add_subplot(111, projection='3d')

    # Plot surface and contours
    ax.plot_surface(X_grid, Y_grid, Z_grid, cmap='viridis',
                   alpha=0.8, edgecolor='none', rstride=2, cstride=2, antialiased=True)
    ax.contour(X_grid, Y_grid, Z_grid, zdir='z', offset=0,
              levels=25, cmap='viridis', alpha=0.6)

    # Styling
    ax.set_title("Micro-Canyon Landscape")
    ax.set_xlabel('X (Flat)')
    ax.set_ylabel('Y (Steep)')
    ax.set_zlabel('Loss')
    ax.set_xlim(X_range.min(), X_range.max())
    ax.set_ylim(Y_range.min(), Y_range.max())
    ax.set_zlim(0, Z_grid.max())
    ax.set_yticks([-2, -1, 0, 1, 2])
    ax.set_box_aspect((3, 1, 1))

    # Remove pane fills
    ax.xaxis.pane.fill = False
    ax.yaxis.pane.fill = False
    ax.zaxis.pane.fill = False

    # Animation function
    total_frames = FPS * DURATION_SEC

    def update(frame):
        progress = frame / total_frames
        azim = 45 + (progress * 360)
        elev = 30 + 5 * np.sin(progress * 2 * np.pi)
        ax.view_init(elev=elev, azim=azim)

    # Create and save animation
    print(f"Rendering animation ({total_frames} frames)...")
    ani = animation.FuncAnimation(fig, update, frames=total_frames,
                                 interval=1000/FPS, blit=False)
    ani.save(filename, writer='pillow', fps=FPS)
    print(f"Saved to {filename}")
    plt.close()


# ============================================================================
# VISUALIZATION 2: TRAJECTORY RACE (DUAL-VIEW GIF)
# ============================================================================

def create_trajectory_race_gif(trajectories, filename='adam_3d_eps_race.gif'):
    """Generate dual-view animation showing optimizer trajectories."""
    print("Generating trajectory race animation...")

    fig = plt.figure(figsize=(16, 8))

    # Create two 3D subplots
    ax1 = fig.add_subplot(1, 2, 1, projection='3d')
    ax2 = fig.add_subplot(1, 2, 2, projection='3d')

    # Surface data for context
    X = np.linspace(-0.2, 1.2, 50)
    Y = np.linspace(-0.5, 1.5, 50)
    X_grid, Y_grid = np.meshgrid(X, Y)
    Z_grid = loss_fn(X_grid, Y_grid)

    # Setup both axes
    for ax, title in zip([ax1, ax2],
                         ["View 1: Isometric Overview",
                          "View 2: Valley Floor (Worm's Eye)"]):
        ax.plot_surface(X_grid, Y_grid, Z_grid, cmap='gray',
                       alpha=0.15, edgecolor='none')
        ax.contour(X_grid, Y_grid, Z_grid, zdir='z', offset=0,
                  levels=20, cmap='gray', alpha=0.3)
        ax.set_title(title)
        ax.set_xlabel('X (Flat)')
        ax.set_ylabel('Y (Steep)')
        ax.set_zlabel('Loss')
        ax.set_xlim(-0.2, 1.2)
        ax.set_ylim(-0.5, 1.5)
        ax.set_zlim(0, 1.5)

    # Set viewing angles
    ax1.view_init(elev=30, azim=-60)  # Isometric
    ax2.view_init(elev=5, azim=-10)   # Worm's eye

    # Initialize lines and dots for each epsilon
    labels = [f'eps={eps}' for eps in EPS_LIST]
    lines_ax1 = [ax1.plot([], [], [], color=c, lw=2, label=l)[0]
                 for c, l in zip(COLORS, labels)]
    dots_ax1 = [ax1.plot([], [], [], color=c, marker='o', markersize=8)[0]
                for c in COLORS]
    lines_ax2 = [ax2.plot([], [], [], color=c, lw=2)[0] for c in COLORS]
    dots_ax2 = [ax2.plot([], [], [], color=c, marker='o', markersize=8)[0]
                for c in COLORS]

    ax1.legend(loc='upper right')

    # Animation update function
    def update(frame):
        idx = min(frame * 2, STEPS - 1)  # Speed up 2x

        for j, path in enumerate(trajectories):
            i = min(idx, len(path) - 1)

            # Trajectory up to current frame
            xs = path[:i+1, 0]
            ys = path[:i+1, 1]
            zs = loss_fn(xs, ys)

            # Current position
            cx, cy = [path[i, 0]], [path[i, 1]]
            cz = [loss_fn(path[i, 0], path[i, 1])]

            # Update both views
            for lines, dots in [(lines_ax1, dots_ax1), (lines_ax2, dots_ax2)]:
                lines[j].set_data(xs, ys)
                lines[j].set_3d_properties(zs)
                dots[j].set_data(cx, cy)
                dots[j].set_3d_properties(cz)

        return lines_ax1 + dots_ax1 + lines_ax2 + dots_ax2

    # Create and save animation
    ani = animation.FuncAnimation(fig, update, frames=STEPS//2,
                                 interval=30, blit=False)
    ani.save(filename, writer='pillow', fps=30)
    print(f"Saved to {filename}")
    plt.close()


# ============================================================================
# VISUALIZATION 3: AMPLIFICATION FACTOR ANALYSIS
# ============================================================================

def plot_amplification_analysis(results, filename='epsilon_amplification.png'):
    """
    Plot how epsilon affects the amplification factor (step size / gradient).
    Shows when optimizers enter the 'epsilon trap' regime.
    """
    print("Generating amplification analysis...")
    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 8))

    jet_colors = cm.jet(np.linspace(0, 1, len(EPS_LIST)))

    for i, eps in enumerate(EPS_LIST):
        color = jet_colors[i]
        data = results[eps]

        # Filter valid data
        mask = data['grads'] > 1e-12
        grads = data['grads'][mask]
        # Amplification = (step_size / learning_rate) / gradient
        gains = (data['updates'][mask] / LEARNING_RATE) / grads

        # Plot raw scatter (faint)
        ax.scatter(grads, gains, color=color, s=15, alpha=0.12, edgecolors='none')

        # Calculate binned median trend
        df = pd.DataFrame({'grad': grads, 'gain': gains})
        bins = np.logspace(np.log10(grads.min()), np.log10(grads.max()), 30)
        df['bin'] = pd.cut(df['grad'], bins, include_lowest=True)

        grouped = df.groupby('bin', observed=True)['gain']
        trend = grouped.median()
        lower = grouped.quantile(0.25)
        upper = grouped.quantile(0.75)

        bin_centers = [interval.mid for interval in trend.index]

        # Plot median trend
        ax.plot(bin_centers, trend.values, color=color, linewidth=4,
               label=rf'$\epsilon = 10^{{{int(np.log10(eps))}}}$')

        # Shade IQR region
        ax.fill_between(bin_centers, lower.values, upper.values,
                       color=color, alpha=0.20, edgecolor='none')

        # Theoretical ceiling (1/epsilon)
        ceiling = 1 / eps
        ax.axhline(ceiling, color=color, linestyle='--', linewidth=1.8,
                  alpha=0.65, label=f'Ceiling: 1/ε = {ceiling:.0e}')

    # Formatting
    ax.set_xlabel(r'Gradient Magnitude (log scale)', fontsize=14)
    ax.set_ylabel(r'Amplification Factor (log scale)', fontsize=14)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlim(1e-10, 1e-7)
    ax.set_ylim(1e4, 1e11)
    ax.legend(fontsize=12, loc='upper right', frameon=True, shadow=True)
    ax.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    print(f"Saved to {filename}")
    plt.close()


# ============================================================================
# UTILITY: CHECK FLOATING POINT PRECISION
# ============================================================================

def inspect_dtype(dtype, name):
    """Print floating point characteristics for a given dtype."""
    print(f"\n--- {name} ---")
    info = torch.finfo(dtype)

    print(f"Machine Epsilon (eps):      {info.eps}")
    print(f"Smallest Normal (tiny):     {info.tiny}")
    print(f"Max Value:                  {info.max}")

    # Find smallest subnormal empirically
    val = torch.tensor(info.tiny, dtype=dtype)
    smallest_subnormal = val

    while val > 0:
        smallest_subnormal = val
        val = val / 2

    print(f"True Smallest (Subnormal):  {smallest_subnormal.item()}")
    print("-" * 50)


def check_precision_limits():
    """Check FP16 and BF16 precision characteristics."""
    print("\nFloating Point Precision Analysis")
    print("=" * 50)

    try:
        inspect_dtype(torch.bfloat16, "BFLOAT16")
    except TypeError:
        print("BFLOAT16 not supported on this device/version.")

    inspect_dtype(torch.float16, "FP16 (Half)")
    inspect_dtype(torch.float32, "FP32 (Float)")


# ============================================================================
# MAIN EXPERIMENT PIPELINE
# ============================================================================

def run_full_experiment():
    """Execute complete experimental pipeline."""
    print("=" * 70)
    print("THE EPSILON TRAP EXPERIMENT")
    print("=" * 70)

    # Step 1: Check precision limits
    check_precision_limits()

    # Step 2: Generate trajectories
    print("\n" + "=" * 70)
    print("Running optimizers...")
    print("=" * 70)

    trajectories = []
    results = {}

    for eps, color in zip(EPS_LIST, COLORS):
        print(f"\nRunning eps={eps}...")
        path, grads, updates = run_optimizer(eps, track_metrics=True)

        trajectories.append(path)
        results[eps] = {
            'path': path,
            'grads': grads,
            'updates': updates,
            'color': color
        }

        print(f"  Final position: {path[-1]}")
        print(f"  Steps taken: {len(path)}")

    # Step 3: Create visualizations
    print("\n" + "=" * 70)
    print("Generating visualizations...")
    print("=" * 70)

    create_loss_landscape_gif()
    create_trajectory_race_gif(trajectories)
    plot_amplification_analysis(results)

    print("\n" + "=" * 70)
    print("EXPERIMENT COMPLETE!")
    print("=" * 70)
    print("\nGenerated files:")
    print("  - elongated_canyon.gif (rotating loss landscape)")
    print("  - adam_3d_eps_race.gif (trajectory comparison)")
    print("  - epsilon_amplification.png (amplification analysis)")

    return results


# ============================================================================
# ENTRY POINT
# ============================================================================

if __name__ == "__main__":
    results = run_full_experiment()
	import torch
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.cm as cm
	import matplotlib.animation as animation
	from mpl_toolkits.mplot3d import Axes3D
	import pandas as pd

	# ============================================================================
	# CONFIGURATION
	# ============================================================================

	# Optimization parameters
	STEPS = 400
	LEARNING_RATE = 0.5
	START_POS = [1.0, 1.0]
	SCALE_X = 1e-4 # Creates the "Micro-Canyon" geometry

	# Epsilon values to test
	EPS_LIST = [1e-6, 1e-8, 1e-10]
	COLORS = ['blue', 'green', 'red'] # Explicit mapping for clarity

	# Animation parameters
	FPS = 15
	DURATION_SEC = 16


	# ============================================================================
	# CORE FUNCTIONS
	# ============================================================================

	def loss_fn(x, y):
	"""Micro-Canyon loss landscape: steep in Y, flat in X."""
	return (x * SCALE_X)2 + y2


	def run_optimizer(epsilon, track_metrics=False):
	"""
	Run Adam optimizer with specified epsilon value.

	Args:
	epsilon: Adam's epsilon hyperparameter
	track_metrics: If True, also track gradients and update sizes

	Returns:
	If track_metrics=False: numpy array of shape (STEPS, 2) containing trajectory
	If track_metrics=True: tuple of (trajectory, gradients_x, updates_x)
	"""
	pos = torch.tensor(START_POS, requires_grad=True, dtype=torch.float32)
	optimizer = torch.optim.Adam([pos], lr=LEARNING_RATE, eps=epsilon)

	path = []
	grads_x = [] if track_metrics else None
	updates_x = [] if track_metrics else None

	for _ in range(STEPS):
	# Store current position
	p_prev = pos.detach().clone()
	path.append(p_prev.numpy().copy())

	# Compute gradients
	optimizer.zero_grad()
	loss = (pos[0] * SCALE_X)2 + pos[1]2
	loss.backward()

	# Track metrics if requested
	if track_metrics:
	grads_x.append(abs(pos.grad[0].item()))

	# Take optimization step
	optimizer.step()

	# Track update size if requested
	if track_metrics:
	p_curr = pos.detach()
	updates_x.append(abs(p_curr[0].item() - p_prev[0].item()))

	path = np.array(path)

	if track_metrics:
	return path, np.array(grads_x), np.array(updates_x)
	else:
	return path


	# ============================================================================
	# VISUALIZATION 1: LOSS LANDSCAPE (ROTATING GIF)
	# ============================================================================

	def create_loss_landscape_gif(filename='elongated_canyon.gif'):
	"""Generate rotating 3D visualization of the loss landscape."""
	print("Generating loss landscape...")

	# Generate surface data
	X_range = np.linspace(-6.0, 6.0, 120)
	Y_range = np.linspace(-2.0, 2.0, 120)
	X_grid, Y_grid = np.meshgrid(X_range, Y_range)
	Z_grid = loss_fn(X_grid, Y_grid)

	# Setup figure
	fig = plt.figure(figsize=(10, 5))
	ax = fig.add_subplot(111, projection='3d')

	# Plot surface and contours
	ax.plot_surface(X_grid, Y_grid, Z_grid, cmap='viridis',
	alpha=0.8, edgecolor='none', rstride=2, cstride=2, antialiased=True)
	ax.contour(X_grid, Y_grid, Z_grid, zdir='z', offset=0,
	levels=25, cmap='viridis', alpha=0.6)

	# Styling
	ax.set_title("Micro-Canyon Landscape")
	ax.set_xlabel('X (Flat)')
	ax.set_ylabel('Y (Steep)')
	ax.set_zlabel('Loss')
	ax.set_xlim(X_range.min(), X_range.max())
	ax.set_ylim(Y_range.min(), Y_range.max())
	ax.set_zlim(0, Z_grid.max())
	ax.set_yticks([-2, -1, 0, 1, 2])
	ax.set_box_aspect((3, 1, 1))

	# Remove pane fills
	ax.xaxis.pane.fill = False
	ax.yaxis.pane.fill = False
	ax.zaxis.pane.fill = False

	# Animation function
	total_frames = FPS * DURATION_SEC

	def update(frame):
	progress = frame / total_frames
	azim = 45 + (progress * 360)
	elev = 30 + 5 * np.sin(progress * 2 * np.pi)
	ax.view_init(elev=elev, azim=azim)

	# Create and save animation
	print(f"Rendering animation ({total_frames} frames)...")
	ani = animation.FuncAnimation(fig, update, frames=total_frames,
	interval=1000/FPS, blit=False)
	ani.save(filename, writer='pillow', fps=FPS)
	print(f"Saved to {filename}")
	plt.close()


	# ============================================================================
	# VISUALIZATION 2: TRAJECTORY RACE (DUAL-VIEW GIF)
	# ============================================================================

	def create_trajectory_race_gif(trajectories, filename='adam_3d_eps_race.gif'):
	"""Generate dual-view animation showing optimizer trajectories."""
	print("Generating trajectory race animation...")

	fig = plt.figure(figsize=(16, 8))

	# Create two 3D subplots
	ax1 = fig.add_subplot(1, 2, 1, projection='3d')
	ax2 = fig.add_subplot(1, 2, 2, projection='3d')

	# Surface data for context
	X = np.linspace(-0.2, 1.2, 50)
	Y = np.linspace(-0.5, 1.5, 50)
	X_grid, Y_grid = np.meshgrid(X, Y)
	Z_grid = loss_fn(X_grid, Y_grid)

	# Setup both axes
	for ax, title in zip([ax1, ax2],
	["View 1: Isometric Overview",
	"View 2: Valley Floor (Worm's Eye)"]):
	ax.plot_surface(X_grid, Y_grid, Z_grid, cmap='gray',
	alpha=0.15, edgecolor='none')
	ax.contour(X_grid, Y_grid, Z_grid, zdir='z', offset=0,
	levels=20, cmap='gray', alpha=0.3)
	ax.set_title(title)
	ax.set_xlabel('X (Flat)')
	ax.set_ylabel('Y (Steep)')
	ax.set_zlabel('Loss')
	ax.set_xlim(-0.2, 1.2)
	ax.set_ylim(-0.5, 1.5)
	ax.set_zlim(0, 1.5)

	# Set viewing angles
	ax1.view_init(elev=30, azim=-60) # Isometric
	ax2.view_init(elev=5, azim=-10) # Worm's eye

	# Initialize lines and dots for each epsilon
	labels = [f'eps={eps}' for eps in EPS_LIST]
	lines_ax1 = [ax1.plot([], [], [], color=c, lw=2, label=l)[0]
	for c, l in zip(COLORS, labels)]
	dots_ax1 = [ax1.plot([], [], [], color=c, marker='o', markersize=8)[0]
	for c in COLORS]
	lines_ax2 = [ax2.plot([], [], [], color=c, lw=2)[0] for c in COLORS]
	dots_ax2 = [ax2.plot([], [], [], color=c, marker='o', markersize=8)[0]
	for c in COLORS]

	ax1.legend(loc='upper right')

	# Animation update function
	def update(frame):
	idx = min(frame * 2, STEPS - 1) # Speed up 2x

	for j, path in enumerate(trajectories):
	i = min(idx, len(path) - 1)

	# Trajectory up to current frame
	xs = path[:i+1, 0]
	ys = path[:i+1, 1]
	zs = loss_fn(xs, ys)

	# Current position
	cx, cy = [path[i, 0]], [path[i, 1]]
	cz = [loss_fn(path[i, 0], path[i, 1])]

	# Update both views
	for lines, dots in [(lines_ax1, dots_ax1), (lines_ax2, dots_ax2)]:
	lines[j].set_data(xs, ys)
	lines[j].set_3d_properties(zs)
	dots[j].set_data(cx, cy)
	dots[j].set_3d_properties(cz)

	return lines_ax1 + dots_ax1 + lines_ax2 + dots_ax2

	# Create and save animation
	ani = animation.FuncAnimation(fig, update, frames=STEPS//2,
	interval=30, blit=False)
	ani.save(filename, writer='pillow', fps=30)
	print(f"Saved to {filename}")
	plt.close()


	# ============================================================================
	# VISUALIZATION 3: AMPLIFICATION FACTOR ANALYSIS
	# ============================================================================

	def plot_amplification_analysis(results, filename='epsilon_amplification.png'):
	"""
	Plot how epsilon affects the amplification factor (step size / gradient).
	Shows when optimizers enter the 'epsilon trap' regime.
	"""
	print("Generating amplification analysis...")
	plt.style.use('seaborn-v0_8-whitegrid')
	fig, ax = plt.subplots(figsize=(12, 8))

	jet_colors = cm.jet(np.linspace(0, 1, len(EPS_LIST)))

	for i, eps in enumerate(EPS_LIST):
	color = jet_colors[i]
	data = results[eps]

	# Filter valid data
	mask = data['grads'] > 1e-12
	grads = data['grads'][mask]
	# Amplification = (step_size / learning_rate) / gradient
	gains = (data['updates'][mask] / LEARNING_RATE) / grads

	# Plot raw scatter (faint)
	ax.scatter(grads, gains, color=color, s=15, alpha=0.12, edgecolors='none')

	# Calculate binned median trend
	df = pd.DataFrame({'grad': grads, 'gain': gains})
	bins = np.logspace(np.log10(grads.min()), np.log10(grads.max()), 30)
	df['bin'] = pd.cut(df['grad'], bins, include_lowest=True)

	grouped = df.groupby('bin', observed=True)['gain']
	trend = grouped.median()
	lower = grouped.quantile(0.25)
	upper = grouped.quantile(0.75)

	bin_centers = [interval.mid for interval in trend.index]

	# Plot median trend
	ax.plot(bin_centers, trend.values, color=color, linewidth=4,
	label=rf'$\epsilon = 10^{{{int(np.log10(eps))}}}$')

	# Shade IQR region
	ax.fill_between(bin_centers, lower.values, upper.values,
	color=color, alpha=0.20, edgecolor='none')

	# Theoretical ceiling (1/epsilon)
	ceiling = 1 / eps
	ax.axhline(ceiling, color=color, linestyle='--', linewidth=1.8,
	alpha=0.65, label=f'Ceiling: 1/ε = {ceiling:.0e}')

	# Formatting
	ax.set_xlabel(r'Gradient Magnitude (log scale)', fontsize=14)
	ax.set_ylabel(r'Amplification Factor (log scale)', fontsize=14)
	ax.set_xscale('log')
	ax.set_yscale('log')
	ax.set_xlim(1e-10, 1e-7)
	ax.set_ylim(1e4, 1e11)
	ax.legend(fontsize=12, loc='upper right', frameon=True, shadow=True)
	ax.grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig(filename, dpi=300, bbox_inches='tight')
	print(f"Saved to {filename}")
	plt.close()


	# ============================================================================
	# UTILITY: CHECK FLOATING POINT PRECISION
	# ============================================================================

	def inspect_dtype(dtype, name):
	"""Print floating point characteristics for a given dtype."""
	print(f"\n--- {name} ---")
	info = torch.finfo(dtype)

	print(f"Machine Epsilon (eps): {info.eps}")
	print(f"Smallest Normal (tiny): {info.tiny}")
	print(f"Max Value: {info.max}")

	# Find smallest subnormal empirically
	val = torch.tensor(info.tiny, dtype=dtype)
	smallest_subnormal = val

	while val > 0:
	smallest_subnormal = val
	val = val / 2

	print(f"True Smallest (Subnormal): {smallest_subnormal.item()}")
	print("-" * 50)


	def check_precision_limits():
	"""Check FP16 and BF16 precision characteristics."""
	print("\nFloating Point Precision Analysis")
	print("=" * 50)

	try:
	inspect_dtype(torch.bfloat16, "BFLOAT16")
	except TypeError:
	print("BFLOAT16 not supported on this device/version.")

	inspect_dtype(torch.float16, "FP16 (Half)")
	inspect_dtype(torch.float32, "FP32 (Float)")


	# ============================================================================
	# MAIN EXPERIMENT PIPELINE
	# ============================================================================

	def run_full_experiment():
	"""Execute complete experimental pipeline."""
	print("=" * 70)
	print("THE EPSILON TRAP EXPERIMENT")
	print("=" * 70)

	# Step 1: Check precision limits
	check_precision_limits()

	# Step 2: Generate trajectories
	print("\n" + "=" * 70)
	print("Running optimizers...")
	print("=" * 70)

	trajectories = []
	results = {}

	for eps, color in zip(EPS_LIST, COLORS):
	print(f"\nRunning eps={eps}...")
	path, grads, updates = run_optimizer(eps, track_metrics=True)

	trajectories.append(path)
	results[eps] = {
	'path': path,
	'grads': grads,
	'updates': updates,
	'color': color
	}

	print(f" Final position: {path[-1]}")
	print(f" Steps taken: {len(path)}")

	# Step 3: Create visualizations
	print("\n" + "=" * 70)
	print("Generating visualizations...")
	print("=" * 70)

	create_loss_landscape_gif()
	create_trajectory_race_gif(trajectories)
	plot_amplification_analysis(results)

	print("\n" + "=" * 70)
	print("EXPERIMENT COMPLETE!")
	print("=" * 70)
	print("\nGenerated files:")
	print(" - elongated_canyon.gif (rotating loss landscape)")
	print(" - adam_3d_eps_race.gif (trajectory comparison)")
	print(" - epsilon_amplification.png (amplification analysis)")

	return results


	# ============================================================================
	# ENTRY POINT
	# ============================================================================

	if __name__ == "__main__":
	results = run_full_experiment()
No results found