Skip to content

Instantly share code, notes, and snippets.

View Steboss's full-sized avatar

Steboss Steboss

View GitHub Profile
@Steboss
Steboss / iteration_4.rs
Created January 16, 2026 10:36
Final iteration where the model starts to use a more neon-like style
use rayon::prelude::*;
use std::arch::aarch64::*;
use std::env;
use std::fs;
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 4 {
eprintln!("Usage: {} M K N", args[0]);
return;
@Steboss
Steboss / iteration_0.rs
Created January 16, 2026 10:35
First iteration from the model. It generates a Strassen algorithm
use rayon::prelude::*;
use std::env;
const N: usize = 2; // Fixed block size
// HELPER: Convert slice to fixed array for safety (optional but good for debugging)
fn to_array(slice: &[f64]) -> [f64; N * N] {
slice.try_into().expect("Slice has incorrect length")
}
@Steboss
Steboss / matmulmanager.py
Created January 16, 2026 10:25
Matrix Multiplication Autogen's manager to orchestrate all the agents
import logging
import re
from typing import List
from autogen_core import (
DefaultTopicId,
MessageContext,
RoutedAgent,
message_handler,
)
@Steboss
Steboss / basematmulagent.py
Created January 16, 2026 10:13
Base Autogen's class for agents workflows
import logging
import json
from typing import List
from autogen_core import (
DefaultTopicId,
MessageContext,
RoutedAgent,
message_handler,
)
@Steboss
Steboss / semantic_chunker.py
Created January 16, 2026 10:02
Create a vector database with semantic chunker
import os
import re
import glob
import pymupdf4llm
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
class BaseMatMulAgent(RoutedAgent):
"""
A generic agent that can handle RequestToSpeak, call tools, and publish messages.
"""
def __init__(
self,
description: str,
group_chat_topic_type: str,
model_client: ChatCompletionClient,
system_message: str,
@Steboss
Steboss / submit2.sh
Created June 5, 2025 09:29
detailed version of submit file
#!/bin/bash
#SBATCH -A your_account
#SBATCH -p your_partition
#SBATCH -N 4 # example with 4 nodes
#SBATCH -t 04:00:00 # max run time
#SBATCH -J "something_in_line_with_your_system"
export CONFIG="fuji-70B-v2-flash" # here you can insert, for example, fuji-7B-v2-flash
export CONTAINER="ghcr.io/nvidia/jax:axlearn" # this is our public jax-axlearn container
@Steboss
Steboss / submit.sh
Created June 4, 2025 13:03
Example for using SLURM in Fuji
#!/bin/bash
#SBATCH -A something
#SBATCH -p some partition
#SBATCH -N 2 # number of nodes to use
#SBATCH -t
#SBATCH -J
export CONFIG="fuji-70B-v2-flash"
export CONTAINER="my-container"
matmul forward matmul backward
C OpenMP 7.82 +/- 0.12 2.90 +/- 0.03
Rust base 28.52 +/- 0.32 12.25 +/- 0.16
Rust Rayon 4.14 +/- 0.01 4.24 +/- 0.02
Rust Blas 0.05 +/- 0.01 0.19 +/- 0.01
@Steboss
Steboss / average_benchmark.csv
Created January 24, 2024 21:22
Average time benchmark
# of lines average time (s)
1000 0.27 ± 0.01
10000 0.32 ± 0.01
100000 0.97 ± 0.02
1000000 7.27 ± 0.06
10000000 64.29 ± 0.10