joey00072

## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## clip_vs_mask.py
x1 = torch.randn(5)
x2 = x1.clone()

x1.requires_grad = True
x2.requires_grad = True

alpha = 1 - 0.5
beta = 1 + 0.53


## mla.py
# https://x.com/shxf0072/status/1873038335427658011

import torch
import torch.nn as nn
import torch.nn.functional as F

from dataclasses import dataclass
from collections import OrderedDict

from ohara.modules.norm import RMSNorm
	x1 = torch.randn(5)
	x2 = x1.clone()

	x1.requires_grad = True
	x2.requires_grad = True

	alpha = 1 - 0.5
	beta = 1 + 0.53
	# https://x.com/shxf0072/status/1873038335427658011

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from dataclasses import dataclass
	from collections import OrderedDict

	from ohara.modules.norm import RMSNorm