Skip to content

Instantly share code, notes, and snippets.

@raphlinus
raphlinus / half_experiment.patch
Created November 19, 2025 23:22
Experiment with widen/narrow associated types
commit 49309841ba5cb7db0989a87c0a1d4b2947f02314
Merge: 3abe852 de4ec3f
Author: Raph Levien <raph@google.com>
Date: Wed Jun 11 08:12:42 2025 -0700
On gen2: half experiment
diff --cc fearless_simd/src/generated/simd_trait.rs
index 875ccad,875ccad..544068b
--- a/fearless_simd/src/generated/simd_trait.rs
@raphlinus
raphlinus / kbound.rs
Created November 14, 2025 05:04
Compute curvature bounds for cubic Bézier using interval arithmetic
use kurbo::{CubicBez, ParamCurveDeriv};
/// Compute bounds on curvature
pub fn kbound(c: CubicBez) -> (f64, f64) {
let q = c.deriv();
let p1xp0 = q.p1.to_vec2().cross(q.p0.to_vec2());
let p2xp0 = q.p2.to_vec2().cross(q.p0.to_vec2());
let p2xp1 = q.p2.to_vec2().cross(q.p1.to_vec2());
let c0 = 2. * p1xp0;
let c1 = 2. * (p2xp0 - 2.0 * p1xp0);
@raphlinus
raphlinus / neo_flatten.rs
Created November 12, 2025 04:43
prototype SIMD implementation of new flatten algorithm
use fearless_simd::{Level, Select, Simd, SimdInto, dispatch, f32x4};
#[repr(C)]
#[derive(Clone, Copy, Debug)]
struct Point {
x: f32,
y: f32,
}
impl Point {
@raphlinus
raphlinus / neon_flatten.rs
Created July 7, 2025 18:19
Sketch of Neon code for flattening
unsafe fn approx_parabola_integral(x: float32x4_t) -> float32x4_t {
const D: f32 = 0.67;
let x2 = vmulq_f32(x, x);
let t1 = vfmaq_f32(vdupq_n_f32(D.powi(4)), vdupq_n_f32(0.25), x2);
let t1_sqrt = vsqrtq_f32(t1);
let t1_fourthroot = vsqrtq_f32(t1_sqrt);
let denom = vaddq_f32(vdupq_n_f32(1.0 - D), t1_fourthroot);
vdivq_f32(x, denom)
}
@raphlinus
raphlinus / hello_stickshift.rs
Created March 27, 2025 04:45
First code from stickshift
// Input:
// fn foo(x: [f32; 4], y: f32) -> [f32; 4] {
// x; x * (x - 2.0) * y
// }
fn foo(x: [f32; 4], y: f32) -> [f32; 4] {
unsafe {
let v__0 = ::core::mem::transmute::<[f32; 4usize], ::core::arch::aarch64::float32x4_t>(x);
let v__1 = y;
let v__2 = v__0;
let v__3 = v__0;
@raphlinus
raphlinus / flatten.rs
Created January 16, 2025 05:31
Snapshot of SIMD flatten implementation
// Copyright 2025 the Fearless_SIMD Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Example of fast flattening cubic Beziers.
// Arguably we should just take a kurbo dep (or do development
// in another crate), but here we can
use core::f32;
@raphlinus
raphlinus / ordered_queue_test.rs
Created December 7, 2024 22:42
Sketch of queue
use std::time::Duration;
use rand::Rng;
fn main() {
let (s, mut r) = ordered_channel::bounded(10);
for i in 0..100 {
let s_clone = s.clone();
rayon::spawn_fifo(move || {
let mut rng = rand::thread_rng();
let sleep_time = rng.gen_range(0..100);
@raphlinus
raphlinus / neon_to_srgb.rs
Created November 15, 2024 15:48
Neon implementation of linear to sRGB transfer function
// Copyright 2024 the Color Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[inline(never)]
pub unsafe fn to_srgb(rgba: [f32; 4]) -> [f32; 4] {
let v = aarch64::vld1q_f32(rgba.as_ptr());
let vabs = aarch64::vabsq_f32(v);
let bias = aarch64::vdupq_n_f32(-5.35862651e-04);
@raphlinus
raphlinus / simd_reduce_test.rs
Last active April 12, 2025 20:33
Comparison of scalar and SIMD max reduction
// run with `RUSTFLAGS='-C target-cpu=native' cargo +nightly bench`
#![feature(test)]
fn main() {
let mut a = [0u32; 65536];
a[1] = 42;
println!("{}", scalar_max(&a));
println!("{}", avx2_max(&a));
}
@raphlinus
raphlinus / gist:5aca9de53f9d6b24933cb24d8a60df63
Created March 15, 2024 04:22
apparent miscompilation of flatten.wgsl
1 s_version 0x4004 4 0.01 2
2 s_inst_prefetch 0x3 4 0.01 1
3 s_getpc_b64 s[0:1] 4 0.03 5
4 s_mov_b32 s0, s2 4 0.05 9
5 s_load_dwordx4 s[4:7], s[0:1], null 4 0.01 1
6 s_load_dwordx4 s[12:15], s[0:1], 0x20 4 0.01 1
7 s_load_dwordx4 s[16:19], s[0:1], 0x40 4 0.01 1
8 v_lshl_add_u32 v3, s8, 8, v0 4 0.03 5
9 v_lshrrev_b32_e32 v0, 2, v3 4 0.01 1
10 s_waitcnt lgkmcnt(0)