Sofian Mejjoute Ryu1845

## softmax_quack.py
import argparse
import time
from typing import Type

import torch
import torch.nn.functional as F
import torch._inductor.config

torch._inductor.config.triton.multi_kernel = True

## gist:dbb48a060630143634eb1e07cb92da16
import torch
from torch import nn
from torch.distributed.tensor.placement_types import Replicate, Shard
from torch.testing._internal.distributed.fake_pg import FakeStore

import torch.distributed as dist
from torch.distributed.device_mesh import init_device_mesh
from torch.distributed.tensor import DTensor, Replicate

world_size = 4

## _06_fused_attention_blockptr_jvp.py
from __future__ import annotations
"""
Fused Attention
===============

This is a Triton implementation of the Flash Attention v2 algorithm from Tri Dao (https://tridao.me/publications/flash2/flash2.pdf)

Credits: OpenAI kernel team
Extra Credits:

## slop.md

      
              1 file
            
          
              0 forks
            
          
                11 comments
              
            
              153 stars
            
          
                bagder
                / slop.md
            
            
              Last active
              March 3, 2026 11:49
            
              
                AI slop security reports submitted to curl
              
          
    Slop

This collection is limited to only include the reports that were submitted as security vulnerabilities
to the curl bug-bounty program on Hackerone.
Several other issues not included here are highly suspcious as well.
Reports


[Critical] Curl CVE-2023-38545 vulnerability code changes are disclosed on the internet. #2199174


## modal_quic_hole_punch.py
"""
Proof-of-concept for NAT traversal and low-latency communication over QUIC
between two Modal containers.

In theory this could be used to establish a low-latency p2p connection between a
service running outside Modal and a Modal GPU container, e.g. for real-time
inference on a video stream. Please let us know if you try it!

Usage:
> modal run modal_quic_hole_punch.py

## diloco_nesterov_.7lr_.0_to_.9_momentum_1000_momentum_warmup_1-momentum_dampening_dampening_initial_step_bugfix_25_steps_all_run3.log
import os
import sys
with open(sys.argv[0]) as f:
    code = f.read() # read the code of this file ASAP, for logging
import uuid
import time
import glob
import subprocess
import contextlib
from dataclasses import dataclass

## mla.py
# https://x.com/shxf0072/status/1873038335427658011

import torch
import torch.nn as nn
import torch.nn.functional as F

from dataclasses import dataclass
from collections import OrderedDict

from ohara.modules.norm import RMSNorm

## model.py
from typing import Tuple

import torch
from torch import nn, Tensor
import torch.nn.functional as F
from einops import rearrange
from .modules import HiFiGANEncoder, HiFiGANDecoder, GroupFiniteScalarQuantizer


class AudioCodecModel(nn.Module):

## wrapper.py
# Train GPT-2 in five minutes -- for free
#
# ```bash
# pip install modal
# modal setup
# modal run wrapper.py
# ```
#
# Note that the end-to-end latency the first time is more like 25 minutes:
# - five minutes to install Torch (rip)

## ring_attn.py
"""Ring attention for PyTorch.

See https://github.com/nshepperd/flash_attn_jax/blob/main/src/flash_attn_jax/ring_attention.py.
"""

import flash_attn.flash_attn_interface as fai
import torch
import torch.distributed as dist
	import argparse
	import time
	from typing import Type

	import torch
	import torch.nn.functional as F
	import torch._inductor.config

	torch._inductor.config.triton.multi_kernel = True
	import torch
	from torch import nn
	from torch.distributed.tensor.placement_types import Replicate, Shard
	from torch.testing._internal.distributed.fake_pg import FakeStore

	import torch.distributed as dist
	from torch.distributed.device_mesh import init_device_mesh
	from torch.distributed.tensor import DTensor, Replicate

	world_size = 4
	from __future__ import annotations
	"""
	Fused Attention
	===============

	This is a Triton implementation of the Flash Attention v2 algorithm from Tri Dao (https://tridao.me/publications/flash2/flash2.pdf)

	Credits: OpenAI kernel team
	Extra Credits:
	"""
	Proof-of-concept for NAT traversal and low-latency communication over QUIC
	between two Modal containers.

	In theory this could be used to establish a low-latency p2p connection between a
	service running outside Modal and a Modal GPU container, e.g. for real-time
	inference on a video stream. Please let us know if you try it!

	Usage:
	> modal run modal_quic_hole_punch.py
	import os
	import sys
	with open(sys.argv[0]) as f:
	code = f.read() # read the code of this file ASAP, for logging
	import uuid
	import time
	import glob
	import subprocess
	import contextlib
	from dataclasses import dataclass
	# https://x.com/shxf0072/status/1873038335427658011

	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from dataclasses import dataclass
	from collections import OrderedDict

	from ohara.modules.norm import RMSNorm
	from typing import Tuple

	import torch
	from torch import nn, Tensor
	import torch.nn.functional as F
	from einops import rearrange
	from .modules import HiFiGANEncoder, HiFiGANDecoder, GroupFiniteScalarQuantizer


	class AudioCodecModel(nn.Module):
	# Train GPT-2 in five minutes -- for free
	#
	# ```bash
	# pip install modal
	# modal setup
	# modal run wrapper.py
	# ```
	#
	# Note that the end-to-end latency the first time is more like 25 minutes:
	# - five minutes to install Torch (rip)
	"""Ring attention for PyTorch.

	See https://github.com/nshepperd/flash_attn_jax/blob/main/src/flash_attn_jax/ring_attention.py.
	"""

	import flash_attn.flash_attn_interface as fai
	import torch
	import torch.distributed as dist