Technical analysis of GitHub implementations for WASM compilation targeting web workers with Float32Array.
Related: spectrum-analyzer-trace-implementation-plan.md
| Algorithm | WASM Viability | Complexity | Best For |
|---|---|---|---|
| scipy find_peaks C++ | ✅ Emscripten | O(n²) worst | Full-featured, prominence/width |
| Z-score streaming | ✅ Emscripten/Rust | O(1)/sample | Real-time, fixed memory |
| Topological (Rust) | ✅ Native wasm32 | O(n log n) | Auto-ranking, no thresholds |
| AMPD C++ | O(n²) | Periodic signals only | |
| Tugbars embedded C | ✅ Emscripten | O(log n) | Single peak, FWHM validation |
Recommendation: Port Topological (Rust) for WASM - cleanest code, no_std compatible, O(n log n).
Source: johnhillross/find_peaks-CPP (5 ⭐)
// Phase 1: Find local maxima (handles plateau/flat peaks)
for (int i = 1; i < x.size() - 1; i++) {
if (x[i - 1] < x[i]) {
int iahead = i + 1;
// Skip plateau (consecutive equal values)
while (iahead < x.size() - 1 && x[iahead] == x[i]) {
iahead++;
}
if (x[iahead] < x[i]) {
// Peak found at midpoint of plateau
peaks.push_back((i + iahead - 1) / 2);
}
i = iahead;
}
}// Find nearest higher peak on each side, then find min within that range
int peak = copyPeaks[i];
double leftMin = x[peak], rightMin = x[peak];
// Scan left until higher peak or boundary
j = peak;
while (j >= imin && x[j] <= x[peak]) {
if (x[j] < leftMin) leftMin = x[j];
j--;
}
// Scan right until higher peak or boundary
j = peak;
while (j <= imax && x[j] <= x[peak]) {
if (x[j] < rightMin) rightMin = x[j];
j++;
}
// Prominence = peak height above highest surrounding minimum
currentProminence = x[peak] - max(leftMin, rightMin);// Height at which to measure width
double height = x[peak] - prominences[i] * relHeight; // relHeight = 0.5
// Interpolate left crossing point
while (j > imin && x[j] > height) j--;
if (x[j] < height) {
leftIp = j + (height - x[j]) / (x[j + 1] - x[j]); // Linear interpolation
}
// Interpolate right crossing point
while (j < imax && x[j] > height) j++;
if (x[j] < height) {
rightIp = j - (height - x[j]) / (x[j - 1] - x[j]);
}
currentWidth = rightIp - leftIp;| Aspect | Assessment |
|---|---|
| Dependencies | <vector>, <algorithm> - STL heavy |
| Memory | Dynamic allocation, peak list grows |
| SIMD potential | Low - branchy sequential scans |
| Emscripten binary | ~50KB+ with STL |
// Raw pointer API for zero-copy from JS Float32Array
extern "C" {
int find_peaks_wasm(
float* data, int len,
float prominence_min, float prominence_max,
float width_min, float width_max,
int distance,
int* out_peaks, int max_peaks
);
}Source: leandcesar/PeakDetection (73 ⭐) - C++/Arduino
// Fixed memory: 3 circular buffers of size `lag`
double *data, *avg, *std; // lag + 1 elements each
double add(double newSample) {
peak = 0;
int i = index % lag; // current position
int j = (index + 1) % lag; // next position
double deviation = newSample - avg[i];
if (deviation > threshold * std[i]) {
// Positive peak - blend into baseline
data[j] = influence * newSample + (1.0 - influence) * data[i];
peak = 1;
}
else if (deviation < -threshold * std[i]) {
// Negative peak (trough)
data[j] = influence * newSample + (1.0 - influence) * data[i];
peak = -1;
}
else {
data[j] = newSample;
}
// Update running statistics
avg[j] = getAvg(j, lag);
std[j] = getStd(j, lag);
index++;
return std[j];
}double getAvg(int start, int len) {
double x = 0.0;
for (int i = 0; i < len; ++i)
x += data[(start + i) % lag];
return x / len;
}
double getStd(int start, int len) {
double x1 = getAvg(start, len); // mean
double x2 = getPoint(start, len); // mean of squares
double std = x2 - x1 * x1; // variance
// Clamp to epsilon
if (std > -EPSILON && std < EPSILON)
return (std < 0.0) ? -EPSILON : EPSILON;
return sqrt(std);
}| Parameter | Default | Purpose |
|---|---|---|
lag |
32 | Window size for baseline stats |
threshold |
2 | Std deviations to trigger peak |
influence |
0.5 | How much peaks affect baseline (0-1) |
EPSILON |
0.01 | Min std to prevent div/0 |
| Aspect | Assessment |
|---|---|
| Memory | Fixed: 3 * lag * 8 = 768 bytes @ lag=32 |
| Complexity | O(lag) per sample (O(1) with Welford) |
| SIMD | Can vectorize avg/std loops |
| State | Requires persistent struct between calls |
// O(1) per sample with incremental statistics
struct ZScoreDetector {
float* data;
int lag, index;
float threshold, influence;
float running_sum, running_sum_sq; // Incremental stats
};
extern "C" {
ZScoreDetector* zscore_create(int lag, float threshold, float influence);
int zscore_add(ZScoreDetector* d, float sample); // Returns -1, 0, or 1
void zscore_free(ZScoreDetector* d);
}Source: caseykneale/topological_peak_detection (1 ⭐)
pub fn find_homologies<T: PartialOrd>(x: &[T]) -> Result<Vec<Homology>, Unsortable> {
// Step 1: Get indices sorted by value (descending)
let sorted_idxs = sorted_indices(x)?.into_iter();
let mut homologies = Vec::<Homology>::with_capacity(70);
// Step 2: Process from highest to lowest value
for set_idx in sorted_idxs.into_iter().rev() {
let mut found_home = false;
// Check if adjacent to existing region
for homology in (&mut homologies).iter_mut() {
match homology.is_adjacent(set_idx) {
Direction::Left => {
homology.extend_left();
found_home = true;
}
Direction::Right => {
homology.extend_right();
found_home = true;
}
Direction::None => continue,
}
if found_home { break; }
}
// New peak region born at this index
if !found_home {
homologies.push(Homology {
left_extent: set_idx,
right_extent: set_idx,
peak: set_idx, // Birth index IS the peak
});
}
}
Ok(homologies)
}pub struct Homology {
left_extent: usize, // Current left boundary of region
right_extent: usize, // Current right boundary
peak: usize, // Index where region was born (the peak)
}
impl Homology {
pub fn is_adjacent(&self, idx: usize) -> Direction {
if (self.left_extent > 0) && (idx == (self.left_extent - 1)) {
Direction::Left
} else if idx == self.right_extent + 1 {
Direction::Right
} else {
Direction::None
}
}
pub fn extend_left(&mut self) { self.left_extent -= 1; }
pub fn extend_right(&mut self) { self.right_extent += 1; }
}The algorithm works by "flooding" from highest to lowest values:
- Sort indices by value - O(n log n)
- Process top-to-bottom: when processing index i
- If adjacent to existing region → extend that region
- If isolated → birth new region (this IS a peak)
- Persistence = birth_value - death_value (when merged with higher region)
No thresholds needed - peaks are naturally ranked by topological persistence.
| Aspect | Assessment |
|---|---|
| Dependencies | core only - no_std compatible! |
| Memory | O(n) for sorted indices |
| Complexity | O(n log n) dominated by sort |
| SIMD | Sorting benefits from SIMD |
| Binary size | ~5-15KB estimated |
#[no_mangle]
pub extern "C" fn find_peaks_topo(
data: *const f32,
len: usize,
out_peaks: *mut usize,
max_peaks: usize
) -> usize {
let slice = unsafe { std::slice::from_raw_parts(data, len) };
let homologies = find_homologies(slice).unwrap_or_default();
let out_slice = unsafe { std::slice::from_raw_parts_mut(out_peaks, max_peaks) };
let mut count = 0;
for h in homologies.iter().take(max_peaks) {
out_slice[count] = h.get_peak_idx();
count += 1;
}
count
}Source: Tugbars/fast-peak-finding-algorithm (5 ⭐)
static float findPeakRec(MqsRawDataPoint_t a[], int size,
int l, int r, uint16_t *peakIndex,
int ignoreIndices[], int numIgnoreIndices) {
if (l > r) return -1;
int mid = (l + r) / 2;
float max_val = 0.0f;
int max_index = 0;
// Find max in current range
maxrow(a, size, mid, &max_val, &max_index, ignoreIndices, numIgnoreIndices);
if (mid == 0 || mid == size - 1) {
*peakIndex = max_index;
return max_val;
}
// Recurse toward higher neighbor
if (max_val < a[mid - 1].phaseAngle)
return findPeakRec(a, size, l, mid - 1, peakIndex, ignoreIndices, numIgnoreIndices);
else if (max_val < a[mid + 1].phaseAngle)
return findPeakRec(a, size, mid + 1, r, peakIndex, ignoreIndices, numIgnoreIndices);
else {
*peakIndex = max_index;
return max_val;
}
}bool processPeak(MqsRawDataPoint_t a[], int size, uint16_t *peakIndex, bool* isEdgeCase) {
int skippedIndices[3];
int skippedCount = 0;
int maxAttempts = 3;
do {
float peakValue = findPeakRec(a, size, 0, size - 1, peakIndex,
skippedIndices, skippedCount);
if (peakValue == -1) return false;
float prominence = findProminence(a, size - 1, *peakIndex);
if (prominence > 18.0f) { // Prominence threshold
int fwhm = calculateFWHM(a, size, *peakIndex, prominence);
// Edge case: peak near end of data
if (*peakIndex >= size - PEAK_THRESHOLD) {
*isEdgeCase = isPeakClimbing(a, size, *peakIndex, NOISE_TOLERANCE);
}
if (fwhm > 15) { // FWHM threshold
return true; // Valid peak
} else {
// Skip narrow peak, try again
skippedIndices[skippedCount++] = *peakIndex;
}
} else {
break;
}
} while (++retry < maxAttempts);
return false;
}static float findProminence(MqsRawDataPoint_t a[], int size, int peakIndex) {
int leftBoundary = 0;
int rightBoundary = size - 1;
float peak_val = a[peakIndex].phaseAngle;
// Find nearest higher peak on left
for (int i = peakIndex - 1; i >= 0; i--) {
if (a[i].phaseAngle > peak_val) {
leftBoundary = i;
break;
}
}
// Find nearest higher peak on right
for (int i = peakIndex + 1; i < size; i++) {
if (a[i].phaseAngle > peak_val) {
rightBoundary = i;
break;
}
}
// Find minimum within boundaries
float minValue = a[rightBoundary].phaseAngle;
for (int i = leftBoundary; i <= rightBoundary; i++) {
if (a[i].phaseAngle < minValue) {
minValue = a[i].phaseAngle;
}
}
return peak_val - minValue;
}static int calculateFWHM(MqsRawDataPoint_t a[], int size, int peakIndex, float prominence) {
float peakHeight = a[peakIndex].phaseAngle;
float contourLineHeight = peakHeight - prominence;
float halfProminenceHeight = contourLineHeight + (prominence / 2.0f);
// Find left crossing
int leftIndex = peakIndex;
while (leftIndex > 0 && a[leftIndex].phaseAngle > halfProminenceHeight) {
leftIndex--;
}
// Find right crossing
int rightIndex = peakIndex;
while (rightIndex < size - 1 && a[rightIndex].phaseAngle > halfProminenceHeight) {
rightIndex++;
}
return fabsf(rightIndex - leftIndex);
}| Aspect | Assessment |
|---|---|
| Dependencies | Pure C - <math.h>, <stdint.h> |
| Memory | Stack-based, minimal heap |
| Complexity | O(log n) for peak, O(n) for prominence |
| Use case | Single dominant peak with validation |
Source: xmhbbovru/ampd (18 ⭐) - C++
1. Detrend signal (subtract least-squares line)
2. Build Local Maxima Scalogram (LMS) matrix [el × n]:
- For each scale k = 1 to ceil(n/2)-1:
- For each point i:
- If x[i] > x[i-k] AND x[i] > x[i+k]: mark 0 (local max)
- Else: mark α + random
3. gamma[k] = sum of row k
4. lambda = argmin(gamma) - optimal scale
5. Peaks = columns where all values 0..lambda are zero
int n = x.size();
int el = ceil(n / 2) - 1;
Vad mpk(el * n); // O(n²) memory!
int row_start = 0;
for (int kix = 0; kix < el; kix++, row_start += n) {
// Boundaries can't be peaks
for (int i = 0; i <= kix; i++)
mpk[row_start + i] = alpha + alg_rand();
// Check local max condition at scale k
for (int i = kix + 1; i < n - kix - 1; i++) {
if (x[i] <= x[i - kix - 1] || x[i] <= x[i + kix + 1])
mpk[row_start + i] = alpha + alg_rand();
// else stays 0
}
for (int i = n - kix - 1; i < n; i++)
mpk[row_start + i] = alpha + alg_rand();
}| Aspect | Assessment |
|---|---|
| Dependencies | <valarray>, <random>, argtable2 |
| Memory | O(n²) - PROBLEMATIC |
| Complexity | O(n²) |
| Use case | Periodic/quasi-periodic signals only |
Not recommended for WASM - O(n²) memory prohibitive for real-time.
From dev.to benchmark:
| Approach | Time (ms) | Speedup |
|---|---|---|
| Pure JS | 1.403 | 1× |
| wasm-bindgen | 1.623 | 0.86× (slower!) |
| Raw WASM | 0.353 | 4× |
| Raw WASM + SIMD | 0.231 | 6× |
Key insight: wasm-bindgen copies Float32Array twice. Use raw pointers for zero-copy.
class WasmPeakFinder {
private memory: WebAssembly.Memory;
private dataPtr: number;
private resultPtr: number;
private instance: WebAssembly.Instance;
constructor(wasmModule: WebAssembly.Module, maxSize: number) {
this.memory = new WebAssembly.Memory({ initial: 256 });
this.instance = new WebAssembly.Instance(wasmModule, {
env: { memory: this.memory }
});
// Pre-allocate in WASM linear memory
this.dataPtr = this.instance.exports.malloc(maxSize * 4);
this.resultPtr = this.instance.exports.malloc(1000 * 4);
}
findPeaks(data: Float32Array): Uint32Array {
// Zero-copy write to WASM memory
const wasmData = new Float32Array(
this.memory.buffer,
this.dataPtr,
data.length
);
wasmData.set(data);
const numPeaks = (this.instance.exports.find_peaks as Function)(
this.dataPtr,
data.length,
this.resultPtr,
1000
);
// Zero-copy read results
return new Uint32Array(
this.memory.buffer,
this.resultPtr,
numPeaks
);
}
}# Cargo.toml
[lib]
crate-type = ["cdylib"]
[profile.release]
opt-level = 3
lto = true
# Build
cargo build --target wasm32-unknown-unknown --release
wasm-opt -O3 target/wasm32-unknown-unknown/release/peaks.wasm -o peaks.opt.wasmemcc -O3 -msimd128 \
-s WASM=1 \
-s EXPORTED_FUNCTIONS='["_find_peaks", "_malloc", "_free"]' \
-s EXPORTED_RUNTIME_METHODS='["ccall"]' \
-s ALLOW_MEMORY_GROWTH=1 \
findpeaks.c -o findpeaks.js- Port
topological_peak_detectionto no_std - Add f32 support (currently generic over PartialOrd)
- Export via
extern "C"with raw pointers - Compile with
wasm32-unknown-unknown
- Compute persistence = birth_value - death_value
- Sort peaks by persistence (most significant first)
- Optional: add min_persistence threshold
- Port prominence calculation from find_peaks-CPP
- Port FWHM calculation
- Add distance filtering
- Enable
wasm_simdfeature - Use SIMD sorting (pdqsort)
- Vectorize prominence scans
- Persistent Homology Peak Detection
- AMPD Paper - Scholkmann et al., Algorithms 2012
- Z-Score Algorithm (Stack Overflow)
- UMD Peak Finding Guide
- johnhillross/find_peaks-CPP - scipy port
- leandcesar/PeakDetection - Z-score Arduino
- caseykneale/topological_peak_detection - Rust persistent homology
- Tugbars/fast-peak-finding-algorithm - Embedded C
- xmhbbovru/ampd - AMPD C++
- Multi-trace parallelism: SharedArrayBuffer + multiple WASM instances?
- Peak ranking: by prominence, persistence, or height?
- Streaming vs batch: need both APIs?
- Memory budget per worker?
Step-by-step instructions for compiling caseykneale/topological_peak_detection to WASM.
Goal: Return all peak indices from Float32Array input, ranked by significance, for UI marker cycling.
# Install rustup if not present
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# Add WASM target
rustup target add wasm32-unknown-unknown
# Install wasm-opt (binaryen) for optimization
# macOS:
brew install binaryen
# Linux:
apt install binaryen
# Or download from https://github.com/WebAssembly/binaryen/releasesrustc --version # should be 1.70+
rustup target list | grep wasm32-unknown-unknown # should show (installed)
wasm-opt --version # should show binaryen version# From monorepo root
mkdir -p packages/peak-finding-wasm/src
cd packages/peak-finding-wasm[package]
name = "peak-finding-wasm"
version = "0.1.0"
edition = "2021"
authors = ["Your Name"]
description = "Topological peak detection compiled to WASM"
[lib]
crate-type = ["cdylib"] # Required for WASM
[dependencies]
# No dependencies - zero external crates
[profile.release]
opt-level = 3 # Maximum optimization
lto = true # Link-time optimization
codegen-units = 1 # Better optimization, slower compile
panic = "abort" # Smaller binary, no unwinding
strip = true # Strip symbols
[profile.release.package."*"]
opt-level = 3//! Topological Peak Detection - WASM Export
//!
//! Based on: https://github.com/caseykneale/topological_peak_detection
//! Algorithm: Persistent homology for 1-D signal peak finding
#![no_std]
extern crate alloc;
use alloc::vec::Vec;
// ============================================================================
// CORE ALGORITHM (from caseykneale/topological_peak_detection)
// ============================================================================
#[derive(Debug)]
pub struct Unsortable;
/// Sort indices by their corresponding values in the slice
fn sorted_indices(v: &[f32]) -> Result<Vec<usize>, Unsortable> {
let mut indices: Vec<usize> = (0..v.len()).collect();
let mut errored = false;
indices.sort_by(|&a, &b| {
match v[a].partial_cmp(&v[b]) {
Some(ordering) => ordering,
None => {
errored = true;
core::cmp::Ordering::Equal
}
}
});
if errored {
return Err(Unsortable);
}
Ok(indices)
}
enum Direction {
Left,
Right,
None,
}
struct Homology {
left_extent: usize,
right_extent: usize,
peak: usize,
birth_value: f32, // Added: value at peak for persistence calc
}
impl Homology {
fn is_adjacent(&self, idx: usize) -> Direction {
if self.left_extent > 0 && idx == self.left_extent - 1 {
Direction::Left
} else if idx == self.right_extent + 1 {
Direction::Right
} else {
Direction::None
}
}
fn extend_left(&mut self) {
self.left_extent -= 1;
}
fn extend_right(&mut self) {
self.right_extent += 1;
}
}
/// Find all peaks using topological persistence
/// Returns peaks ordered by birth time (highest value first = most significant)
fn find_homologies(x: &[f32]) -> Result<Vec<Homology>, Unsortable> {
if x.is_empty() {
return Err(Unsortable);
}
let sorted_idxs = sorted_indices(x)?;
let mut homologies = Vec::<Homology>::with_capacity(64);
// Process from highest to lowest value (reverse of sorted order)
for set_idx in sorted_idxs.into_iter().rev() {
let mut found_home = false;
for homology in homologies.iter_mut() {
match homology.is_adjacent(set_idx) {
Direction::Left => {
homology.extend_left();
found_home = true;
break;
}
Direction::Right => {
homology.extend_right();
found_home = true;
break;
}
Direction::None => continue,
}
}
// New peak born at this index
if !found_home {
homologies.push(Homology {
left_extent: set_idx,
right_extent: set_idx,
peak: set_idx,
birth_value: x[set_idx],
});
}
}
Ok(homologies)
}
// ============================================================================
// WASM EXPORTS
// ============================================================================
/// Global allocator for no_std
mod allocator {
use core::alloc::{GlobalAlloc, Layout};
struct WasmAllocator;
unsafe impl GlobalAlloc for WasmAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// Simple bump allocator - relies on WASM linear memory
extern "C" {
fn wasm_alloc(size: usize, align: usize) -> *mut u8;
}
wasm_alloc(layout.size(), layout.align())
}
unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {
// No-op: WASM memory freed when instance destroyed
}
}
#[global_allocator]
static ALLOCATOR: WasmAllocator = WasmAllocator;
}
// Simple allocator implementation (alternative: use wee_alloc crate)
static mut HEAP_POS: usize = 0;
const HEAP_START: usize = 65536; // Start after 64KB stack
#[no_mangle]
pub extern "C" fn wasm_alloc(size: usize, align: usize) -> *mut u8 {
unsafe {
let aligned = (HEAP_POS + align - 1) & !(align - 1);
HEAP_POS = aligned + size;
aligned as *mut u8
}
}
#[no_mangle]
pub extern "C" fn wasm_reset_heap() {
unsafe {
HEAP_POS = HEAP_START;
}
}
/// Main entry point: find peaks in f32 array
///
/// # Arguments
/// * `data_ptr` - Pointer to f32 array in WASM linear memory
/// * `data_len` - Number of f32 elements
/// * `out_ptr` - Pointer to output u32 array for peak indices
/// * `max_peaks` - Maximum peaks to return
///
/// # Returns
/// Number of peaks found (written to out_ptr)
///
/// # Safety
/// Caller must ensure pointers are valid and arrays are properly sized
#[no_mangle]
pub extern "C" fn find_peaks(
data_ptr: *const f32,
data_len: usize,
out_ptr: *mut u32,
max_peaks: usize,
) -> usize {
if data_ptr.is_null() || out_ptr.is_null() || data_len < 3 {
return 0;
}
let data = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };
let out = unsafe { core::slice::from_raw_parts_mut(out_ptr, max_peaks) };
let homologies = match find_homologies(data) {
Ok(h) => h,
Err(_) => return 0, // NaN in data
};
let count = homologies.len().min(max_peaks);
for (i, h) in homologies.iter().take(count).enumerate() {
out[i] = h.peak as u32;
}
count
}
/// Extended version: returns peak indices AND values
/// Output format: [idx0, idx1, ..., idxN, val0, val1, ..., valN]
///
/// # Arguments
/// * `data_ptr` - Input f32 array
/// * `data_len` - Input length
/// * `out_indices_ptr` - Output u32 array for indices
/// * `out_values_ptr` - Output f32 array for values at peaks
/// * `max_peaks` - Maximum peaks
///
/// # Returns
/// Number of peaks found
#[no_mangle]
pub extern "C" fn find_peaks_with_values(
data_ptr: *const f32,
data_len: usize,
out_indices_ptr: *mut u32,
out_values_ptr: *mut f32,
max_peaks: usize,
) -> usize {
if data_ptr.is_null() || out_indices_ptr.is_null() || out_values_ptr.is_null() || data_len < 3 {
return 0;
}
let data = unsafe { core::slice::from_raw_parts(data_ptr, data_len) };
let out_indices = unsafe { core::slice::from_raw_parts_mut(out_indices_ptr, max_peaks) };
let out_values = unsafe { core::slice::from_raw_parts_mut(out_values_ptr, max_peaks) };
let homologies = match find_homologies(data) {
Ok(h) => h,
Err(_) => return 0,
};
let count = homologies.len().min(max_peaks);
for (i, h) in homologies.iter().take(count).enumerate() {
out_indices[i] = h.peak as u32;
out_values[i] = h.birth_value;
}
count
}
/// Allocate memory in WASM linear memory
/// Returns pointer to allocated region
#[no_mangle]
pub extern "C" fn alloc(size: usize) -> *mut u8 {
wasm_alloc(size, 8) // 8-byte alignment for f32/u32
}
/// Get required buffer size for given data length
/// Returns bytes needed for input + output buffers
#[no_mangle]
pub extern "C" fn get_buffer_size(data_len: usize, max_peaks: usize) -> usize {
let input_bytes = data_len * 4; // f32 input
let output_indices = max_peaks * 4; // u32 indices
let output_values = max_peaks * 4; // f32 values
input_bytes + output_indices + output_values + 1024 // padding
}
// Panic handler for no_std
#[panic_handler]
fn panic(_info: &core::panic::PanicInfo) -> ! {
loop {}
}The manual allocator above works but wee_alloc is battle-tested. Create this version instead:
[package]
name = "peak-finding-wasm"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
wee_alloc = "0.4" # Tiny allocator for WASM
[profile.release]
opt-level = "z" # Optimize for size
lto = true
codegen-units = 1
panic = "abort"
strip = true#![no_std]
extern crate alloc;
use alloc::vec::Vec;
#[global_allocator]
static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT;
// ... rest of algorithm code same as above ...
// Remove manual allocator module, keep only:
#[panic_handler]
fn panic(_: &core::panic::PanicInfo) -> ! {
loop {}
}Create build.sh:
#!/bin/bash
set -e
echo "Building WASM..."
cargo build --target wasm32-unknown-unknown --release
echo "Optimizing with wasm-opt..."
wasm-opt -O3 \
--strip-debug \
--strip-producers \
target/wasm32-unknown-unknown/release/peak_finding_wasm.wasm \
-o dist/peak-finding.wasm
echo "Checking size..."
ls -lh dist/peak-finding.wasm
echo "Done!"chmod +x build.sh
mkdir -p dist
./build.shExpected output: peak-finding.wasm ~5-15KB
/**
* Peak Finding WASM Loader
* Zero-copy Float32Array integration
*/
export interface PeakFinderWasm {
findPeaks(data: Float32Array): Uint32Array;
findPeaksWithValues(data: Float32Array): { indices: Uint32Array; values: Float32Array };
dispose(): void;
}
interface WasmExports {
memory: WebAssembly.Memory;
find_peaks: (dataPtr: number, dataLen: number, outPtr: number, maxPeaks: number) => number;
find_peaks_with_values: (
dataPtr: number,
dataLen: number,
outIndicesPtr: number,
outValuesPtr: number,
maxPeaks: number
) => number;
alloc: (size: number) => number;
get_buffer_size: (dataLen: number, maxPeaks: number) => number;
wasm_reset_heap?: () => void;
}
export async function createPeakFinder(
wasmPath: string,
maxDataLen: number = 8192,
maxPeaks: number = 256
): Promise<PeakFinderWasm> {
// Load WASM module
const response = await fetch(wasmPath);
const wasmBytes = await response.arrayBuffer();
// Instantiate with minimal imports
const { instance } = await WebAssembly.instantiate(wasmBytes, {
env: {
// Add any required imports here (usually none for no_std)
},
});
const exports = instance.exports as unknown as WasmExports;
const memory = exports.memory;
// Pre-allocate buffers in WASM memory
const inputBytes = maxDataLen * 4;
const outputIndicesBytes = maxPeaks * 4;
const outputValuesBytes = maxPeaks * 4;
const dataPtr = exports.alloc(inputBytes);
const outIndicesPtr = exports.alloc(outputIndicesBytes);
const outValuesPtr = exports.alloc(outputValuesBytes);
// Validate allocation
if (dataPtr === 0 || outIndicesPtr === 0 || outValuesPtr === 0) {
throw new Error('WASM memory allocation failed');
}
return {
findPeaks(data: Float32Array): Uint32Array {
if (data.length > maxDataLen) {
throw new Error(`Data length ${data.length} exceeds max ${maxDataLen}`);
}
// Zero-copy write to WASM memory
const wasmInput = new Float32Array(memory.buffer, dataPtr, data.length);
wasmInput.set(data);
// Call WASM function
const peakCount = exports.find_peaks(dataPtr, data.length, outIndicesPtr, maxPeaks);
// Zero-copy read results (create view, not copy)
// Note: This view is only valid until next WASM call!
return new Uint32Array(memory.buffer, outIndicesPtr, peakCount);
},
findPeaksWithValues(data: Float32Array): { indices: Uint32Array; values: Float32Array } {
if (data.length > maxDataLen) {
throw new Error(`Data length ${data.length} exceeds max ${maxDataLen}`);
}
const wasmInput = new Float32Array(memory.buffer, dataPtr, data.length);
wasmInput.set(data);
const peakCount = exports.find_peaks_with_values(
dataPtr,
data.length,
outIndicesPtr,
outValuesPtr,
maxPeaks
);
return {
indices: new Uint32Array(memory.buffer, outIndicesPtr, peakCount),
values: new Float32Array(memory.buffer, outValuesPtr, peakCount),
};
},
dispose(): void {
// Reset heap for next use (if implemented)
exports.wasm_reset_heap?.();
},
};
}
/**
* Worker-compatible loader (for use in Web Workers)
*/
export async function createPeakFinderFromBytes(
wasmBytes: ArrayBuffer,
maxDataLen: number = 8192,
maxPeaks: number = 256
): Promise<PeakFinderWasm> {
const { instance } = await WebAssembly.instantiate(wasmBytes, {});
const exports = instance.exports as unknown as WasmExports;
const memory = exports.memory;
const dataPtr = exports.alloc(maxDataLen * 4);
const outIndicesPtr = exports.alloc(maxPeaks * 4);
const outValuesPtr = exports.alloc(maxPeaks * 4);
return {
findPeaks(data: Float32Array): Uint32Array {
const wasmInput = new Float32Array(memory.buffer, dataPtr, data.length);
wasmInput.set(data);
const count = exports.find_peaks(dataPtr, data.length, outIndicesPtr, maxPeaks);
return new Uint32Array(memory.buffer, outIndicesPtr, count);
},
findPeaksWithValues(data: Float32Array) {
const wasmInput = new Float32Array(memory.buffer, dataPtr, data.length);
wasmInput.set(data);
const count = exports.find_peaks_with_values(
dataPtr, data.length, outIndicesPtr, outValuesPtr, maxPeaks
);
return {
indices: new Uint32Array(memory.buffer, outIndicesPtr, count),
values: new Float32Array(memory.buffer, outValuesPtr, count),
};
},
dispose() {},
};
}import { createPeakFinder } from './wasm-loader';
// Initialize once
const peakFinder = await createPeakFinder('/peak-finding.wasm');
// Use in render loop
function processTrace(traceData: Float32Array) {
const peakIndices = peakFinder.findPeaks(traceData);
// peakIndices[0] = most significant peak
// peakIndices[1] = second most significant, etc.
// Copy if you need to persist (view is invalidated on next call)
const peaksCopy = new Uint32Array(peakIndices);
return peaksCopy;
}
// For cycling through peaks in UI
let currentPeakIdx = 0;
function cycleToNextPeak(peaks: Uint32Array): number {
currentPeakIdx = (currentPeakIdx + 1) % peaks.length;
return peaks[currentPeakIdx]; // Index into original data
}{
"name": "@algorithms/peak-finding-wasm",
"version": "0.1.0",
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"files": [
"dist/"
],
"scripts": {
"build:wasm": "./build.sh",
"build:ts": "tsc",
"build": "pnpm build:wasm && pnpm build:ts",
"clean": "rm -rf dist target"
},
"devDependencies": {
"typescript": "^5.0.0"
}
}{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "bundler",
"declaration": true,
"outDir": "dist",
"strict": true,
"noEmit": false
},
"include": ["src/**/*.ts"],
"exclude": ["node_modules"]
}import { describe, it, expect, beforeAll } from 'vitest';
import { createPeakFinder, PeakFinderWasm } from '../src/wasm-loader';
import { readFileSync } from 'fs';
import { join } from 'path';
describe('Peak Finding WASM', () => {
let peakFinder: PeakFinderWasm;
beforeAll(async () => {
// Load WASM bytes directly for Node.js testing
const wasmPath = join(__dirname, '../dist/peak-finding.wasm');
const wasmBytes = readFileSync(wasmPath);
const { instance } = await WebAssembly.instantiate(wasmBytes, {});
// ... setup peakFinder
});
it('finds sinusoid peaks', () => {
// Generate sinusoid: peaks at 500, 2500, 4500
const data = new Float32Array(6001);
for (let i = 0; i < data.length; i++) {
data[i] = Math.sin((i / 1000) * Math.PI);
}
const peaks = peakFinder.findPeaks(data);
expect(peaks.length).toBe(4); // 500, 2500, 4500, 6000
expect(peaks).toContain(500);
expect(peaks).toContain(2500);
expect(peaks).toContain(4500);
});
it('handles noisy data', () => {
const data = new Float32Array(1000);
for (let i = 0; i < data.length; i++) {
data[i] = Math.sin((i / 100) * Math.PI) + (Math.random() - 0.5) * 0.1;
}
const peaks = peakFinder.findPeaks(data);
expect(peaks.length).toBeGreaterThan(0);
});
it('returns empty for flat data', () => {
const data = new Float32Array(100).fill(5.0);
const peaks = peakFinder.findPeaks(data);
expect(peaks.length).toBe(0);
});
});# Check current size
wasm-opt --print-size dist/peak-finding.wasm
# Apply all size optimizations
wasm-opt -Oz --strip-debug --strip-producers \
--remove-unused-module-elements \
dist/peak-finding.wasm -o dist/peak-finding.min.wasm// Benchmark in browser
const iterations = 1000;
const data = new Float32Array(4096);
// ... fill with test data
console.time('wasm-peaks');
for (let i = 0; i < iterations; i++) {
peakFinder.findPeaks(data);
}
console.timeEnd('wasm-peaks');
// Expected: <1ms per call for 4096 samples- Check
maxDataLenmatches actual data size - Ensure WASM memory is large enough (increase
initialpages)
- Using
no_stdbut forgot#[panic_handler] - Missing import in instantiation (check console for specific import name)
- Check
[profile.release]settings - Ensure
panic = "abort"is set - Run
wasm-optwith-Oz
- Algorithm returns 0 peaks if NaN in data
- Pre-filter NaN before calling if needed
packages/peak-finding-wasm/
├── Cargo.toml
├── build.sh
├── package.json
├── tsconfig.json
├── src/
│ ├── lib.rs # Rust WASM code
│ ├── wasm-loader.ts # TypeScript wrapper
│ └── index.ts # Re-exports
├── dist/
│ ├── peak-finding.wasm
│ ├── index.js
│ └── index.d.ts
└── test/
└── basic.test.ts
SharedArrayBuffer enables zero-copy data sharing between main thread and worker, eliminating serialization overhead.
┌─────────────────┐ SharedArrayBuffer ┌──────────────────┐
│ Main Thread │◄──────────────────────────►│ Web Worker │
│ │ │ │
│ Float32Array │ ← same underlying memory │ Float32Array │
│ (trace data) │ │ (WASM input) │
│ │ │ │
│ Uint32Array │ ← same underlying memory │ Uint32Array │
│ (peak indices) │ │ (WASM output) │
└─────────────────┘ └──────────────────┘
SharedArrayBuffer requires:
- HTTPS (or localhost)
- Cross-Origin Isolation headers:
Cross-Origin-Opener-Policy: same-origin Cross-Origin-Embedder-Policy: require-corp
// vite.config.ts
export default defineConfig({
server: {
headers: {
'Cross-Origin-Opener-Policy': 'same-origin',
'Cross-Origin-Embedder-Policy': 'require-corp',
},
},
});Create src/shared-memory.ts:
/**
* Shared memory pool for main thread ↔ worker communication
*/
export interface SharedBuffers {
// Input: trace data from main thread
inputBuffer: SharedArrayBuffer;
inputView: Float32Array;
// Output: peak indices from worker
outputBuffer: SharedArrayBuffer;
outputView: Uint32Array;
// Control: synchronization flags
controlBuffer: SharedArrayBuffer;
controlView: Int32Array;
}
// Control indices
const CTRL_STATUS = 0; // 0=idle, 1=processing, 2=done
const CTRL_INPUT_LEN = 1; // Length of input data
const CTRL_OUTPUT_LEN = 2; // Number of peaks found
const CTRL_REQUEST_ID = 3; // Monotonic request counter
export function createSharedBuffers(maxDataLen: number, maxPeaks: number): SharedBuffers {
// Check SharedArrayBuffer support
if (typeof SharedArrayBuffer === 'undefined') {
throw new Error('SharedArrayBuffer not supported. Enable COOP/COEP headers.');
}
const inputBuffer = new SharedArrayBuffer(maxDataLen * 4); // Float32
const outputBuffer = new SharedArrayBuffer(maxPeaks * 4); // Uint32
const controlBuffer = new SharedArrayBuffer(16); // 4 Int32s
return {
inputBuffer,
inputView: new Float32Array(inputBuffer),
outputBuffer,
outputView: new Uint32Array(outputBuffer),
controlBuffer,
controlView: new Int32Array(controlBuffer),
};
}
/**
* Main thread API
*/
export class PeakFinderClient {
private buffers: SharedBuffers;
private worker: Worker;
private requestId = 0;
private pendingResolve: ((peaks: Uint32Array) => void) | null = null;
constructor(buffers: SharedBuffers, worker: Worker) {
this.buffers = buffers;
this.worker = worker;
// Listen for completion signals
this.worker.onmessage = (e) => {
if (e.data.type === 'peaks-ready' && this.pendingResolve) {
const len = this.buffers.controlView[CTRL_OUTPUT_LEN];
// Create copy since underlying buffer may be reused
const peaks = new Uint32Array(this.buffers.outputView.slice(0, len));
this.pendingResolve(peaks);
this.pendingResolve = null;
}
};
}
/**
* Find peaks in trace data
* Zero-copy write to shared buffer, worker reads directly
*/
async findPeaks(data: Float32Array): Promise<Uint32Array> {
// Write data to shared buffer (zero-copy if data is already a view)
this.buffers.inputView.set(data);
// Update control
this.buffers.controlView[CTRL_INPUT_LEN] = data.length;
this.buffers.controlView[CTRL_REQUEST_ID] = ++this.requestId;
// Signal worker using Atomics
Atomics.store(this.buffers.controlView, CTRL_STATUS, 1); // processing
Atomics.notify(this.buffers.controlView, CTRL_STATUS);
// Wait for response
return new Promise((resolve) => {
this.pendingResolve = resolve;
});
}
/**
* Synchronous version using Atomics.wait (blocks main thread - use sparingly!)
*/
findPeaksSync(data: Float32Array): Uint32Array {
this.buffers.inputView.set(data);
this.buffers.controlView[CTRL_INPUT_LEN] = data.length;
this.buffers.controlView[CTRL_REQUEST_ID] = ++this.requestId;
Atomics.store(this.buffers.controlView, CTRL_STATUS, 1);
Atomics.notify(this.buffers.controlView, CTRL_STATUS);
// Block until worker signals done (status = 2)
// WARNING: This blocks the main thread!
Atomics.wait(this.buffers.controlView, CTRL_STATUS, 1);
const len = this.buffers.controlView[CTRL_OUTPUT_LEN];
return new Uint32Array(this.buffers.outputView.slice(0, len));
}
}Create src/peak-worker.ts:
/// <reference lib="webworker" />
import { createPeakFinderFromBytes, type PeakFinderWasm } from './wasm-loader';
const CTRL_STATUS = 0;
const CTRL_INPUT_LEN = 1;
const CTRL_OUTPUT_LEN = 2;
const CTRL_REQUEST_ID = 3;
let peakFinder: PeakFinderWasm | null = null;
let inputView: Float32Array;
let outputView: Uint32Array;
let controlView: Int32Array;
// Initialize on first message
self.onmessage = async (e) => {
if (e.data.type === 'init') {
const { wasmBytes, inputBuffer, outputBuffer, controlBuffer, maxDataLen, maxPeaks } = e.data;
// Create views into shared buffers
inputView = new Float32Array(inputBuffer);
outputView = new Uint32Array(outputBuffer);
controlView = new Int32Array(controlBuffer);
// Initialize WASM with its own memory (separate from SharedArrayBuffer)
peakFinder = await createPeakFinderFromBytes(wasmBytes, maxDataLen, maxPeaks);
self.postMessage({ type: 'ready' });
// Start processing loop
processLoop();
}
};
async function processLoop() {
while (true) {
// Wait for work (status = 1)
const result = Atomics.wait(controlView, CTRL_STATUS, 0); // Wait while idle (0)
if (result === 'not-equal') {
// Status changed, check if we have work
const status = Atomics.load(controlView, CTRL_STATUS);
if (status === 1 && peakFinder) {
// Read input length
const inputLen = controlView[CTRL_INPUT_LEN];
// Copy from SharedArrayBuffer to local Float32Array for WASM
// (WASM can't directly use SharedArrayBuffer memory)
const inputData = new Float32Array(inputLen);
inputData.set(inputView.subarray(0, inputLen));
// Process
const peaks = peakFinder.findPeaks(inputData);
// Write results to shared output buffer
outputView.set(peaks);
controlView[CTRL_OUTPUT_LEN] = peaks.length;
// Signal completion
Atomics.store(controlView, CTRL_STATUS, 2); // done
Atomics.notify(controlView, CTRL_STATUS);
// Notify main thread via postMessage (for async API)
self.postMessage({ type: 'peaks-ready' });
// Reset to idle for next request
Atomics.store(controlView, CTRL_STATUS, 0);
}
}
}
}import { createSharedBuffers, PeakFinderClient } from './shared-memory';
async function setupPeakFinder() {
const MAX_DATA_LEN = 8192;
const MAX_PEAKS = 256;
// Create shared buffers
const buffers = createSharedBuffers(MAX_DATA_LEN, MAX_PEAKS);
// Load WASM bytes
const wasmResponse = await fetch('/peak-finding.wasm');
const wasmBytes = await wasmResponse.arrayBuffer();
// Create worker
const worker = new Worker(
new URL('./peak-worker.ts', import.meta.url),
{ type: 'module' }
);
// Initialize worker with shared buffers
worker.postMessage({
type: 'init',
wasmBytes,
inputBuffer: buffers.inputBuffer,
outputBuffer: buffers.outputBuffer,
controlBuffer: buffers.controlBuffer,
maxDataLen: MAX_DATA_LEN,
maxPeaks: MAX_PEAKS,
});
// Wait for ready
await new Promise<void>((resolve) => {
worker.onmessage = (e) => {
if (e.data.type === 'ready') resolve();
};
});
return new PeakFinderClient(buffers, worker);
}
// Usage
const peakFinder = await setupPeakFinder();
// In render loop
function onNewTraceData(traceData: Float32Array) {
// Async - doesn't block main thread
peakFinder.findPeaks(traceData).then((peaks) => {
console.log('Found peaks at indices:', peaks);
// Update UI markers
});
}| Approach | Data Copy | Latency | Main Thread Blocking |
|---|---|---|---|
| postMessage (transferable) | 1 copy (transfer) | ~0.5-2ms overhead | No |
| postMessage (structured clone) | 2 copies | ~1-5ms overhead | No |
| SharedArrayBuffer + Atomics | 0 copies* | ~0.1ms overhead | Optional |
*Note: WASM still needs its own copy since it can't directly access SharedArrayBuffer. The win is eliminating serialization/deserialization overhead.
Use SharedArrayBuffer when:
- Processing >60fps with large arrays (>4KB)
- Need synchronous results (Atomics.wait)
- Multiple workers sharing same data
- Latency-critical applications
Skip SharedArrayBuffer when:
- Occasional processing (<10fps)
- Small data (<1KB)
- Can't set COOP/COEP headers
- Need Safari <16.4 support
For continuous streaming without blocking:
/**
* Double buffer for lock-free producer/consumer
*/
class DoubleBuffer {
private bufferA: SharedArrayBuffer;
private bufferB: SharedArrayBuffer;
private viewA: Float32Array;
private viewB: Float32Array;
private activeBuffer: Int32Array; // 0 = A, 1 = B
constructor(size: number) {
this.bufferA = new SharedArrayBuffer(size * 4);
this.bufferB = new SharedArrayBuffer(size * 4);
this.viewA = new Float32Array(this.bufferA);
this.viewB = new Float32Array(this.bufferB);
this.activeBuffer = new Int32Array(new SharedArrayBuffer(4));
}
// Main thread writes to inactive buffer
getWriteBuffer(): Float32Array {
return Atomics.load(this.activeBuffer, 0) === 0 ? this.viewB : this.viewA;
}
// Worker reads from active buffer
getReadBuffer(): Float32Array {
return Atomics.load(this.activeBuffer, 0) === 0 ? this.viewA : this.viewB;
}
// Swap buffers atomically
swap() {
const current = Atomics.load(this.activeBuffer, 0);
Atomics.store(this.activeBuffer, 0, current === 0 ? 1 : 0);
}
}export { createPeakFinder, createPeakFinderFromBytes } from './wasm-loader';
export { createSharedBuffers, PeakFinderClient } from './shared-memory';
export type { PeakFinderWasm } from './wasm-loader';
export type { SharedBuffers } from './shared-memory';
// Convenience factory
export async function createPeakFinderWorker(wasmUrl: string) {
const { createSharedBuffers, PeakFinderClient } = await import('./shared-memory');
const MAX_DATA = 8192;
const MAX_PEAKS = 256;
const buffers = createSharedBuffers(MAX_DATA, MAX_PEAKS);
const wasmBytes = await fetch(wasmUrl).then(r => r.arrayBuffer());
const worker = new Worker(
new URL('./peak-worker.ts', import.meta.url),
{ type: 'module' }
);
worker.postMessage({
type: 'init',
wasmBytes,
inputBuffer: buffers.inputBuffer,
outputBuffer: buffers.outputBuffer,
controlBuffer: buffers.controlBuffer,
maxDataLen: MAX_DATA,
maxPeaks: MAX_PEAKS,
});
await new Promise<void>(resolve => {
const handler = (e: MessageEvent) => {
if (e.data.type === 'ready') {
worker.removeEventListener('message', handler);
resolve();
}
};
worker.addEventListener('message', handler);
});
return new PeakFinderClient(buffers, worker);
}import { createPeakFinderWorker } from '@algorithms/peak-finding-wasm';
// Initialize once
const peakFinder = await createPeakFinderWorker('/peak-finding.wasm');
// In your spectrum analyzer component
class SpectrumAnalyzer {
private peakFinder: PeakFinderClient;
private currentPeakIndex = 0;
private peaks: Uint32Array = new Uint32Array(0);
constructor(peakFinder: PeakFinderClient) {
this.peakFinder = peakFinder;
}
async updateTrace(traceData: Float32Array) {
this.peaks = await this.peakFinder.findPeaks(traceData);
this.currentPeakIndex = 0;
this.renderPeakMarker();
}
cycleNextPeak() {
if (this.peaks.length === 0) return;
this.currentPeakIndex = (this.currentPeakIndex + 1) % this.peaks.length;
this.renderPeakMarker();
}
cyclePrevPeak() {
if (this.peaks.length === 0) return;
this.currentPeakIndex = (this.currentPeakIndex - 1 + this.peaks.length) % this.peaks.length;
this.renderPeakMarker();
}
private renderPeakMarker() {
const peakIdx = this.peaks[this.currentPeakIndex];
// peakIdx is the index into traceData where the peak is located
console.log(`Peak ${this.currentPeakIndex + 1}/${this.peaks.length} at index ${peakIdx}`);
// Update your canvas/UI marker position
}
}- Add persistence calculation - rank peaks by topological persistence (birth - death value)
- Add min_persistence filter - threshold to filter noise peaks
- SIMD optimization - use
wasm_simdfor sorting - Streaming API - for real-time sample-by-sample detection
- Worker pool - multiple workers for parallel trace processing