Skip to content

Instantly share code, notes, and snippets.

@achille
Created January 4, 2026 07:33
Show Gist options
  • Select an option

  • Save achille/23680e9100db87565a8e67038797b27d to your computer and use it in GitHub Desktop.

Select an option

Save achille/23680e9100db87565a8e67038797b27d to your computer and use it in GitHub Desktop.
Context:
https://gist.github.com/achille/d1eadf82aa54056b9ded7706e8f56760
https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07
Claude Benchmark:
Config: Moderate (150,100) - CORRECT SOLUTIONS ONLY
────────────────────────────────────────────────────────────────────────────────
Method Time max_err wrong/20000
─────────────────────────────────────────────────────────────────────────────
faded_opt (2 iter) 30.1 ns 0.18 0 ✅
chatgpt_2iter 30.1 ns 0.18 0 ✅ (same as faded_opt 2)
chatgpt_adaptive 35.5 ns 0.018 0 ✅
chatgpt_hybrid (2+1) 41.4 ns 0.020 0 ✅
faded_opt (3 iter) 47.6 ns 0.018 0 ✅
claude_rot (2 iter) 21.2 ns 52 1018 ❌ BROKEN
claude_rot (3 iter) 33.7 ns 43 909 ❌ BROKEN
claude_rot (4 iter) 43.8 ns 44 891 ❌ BROKEN
Chatgpt Benchmark:
Ran it **in my environment** (g++ `-O3 -march=native -ffast-math`, `float`), measuring:
* **runtime:** `ns/call` (median over 9 runs, 50k points)
* **accuracy:** absolute **distance error vs a robust double-precision reference** (max / avg / p99), plus `maxEqErr = |(x/a)^2+(y/b)^2-1|` for the returned point.
### Moderate (150, 100)
| method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr |
| ------------------ | ------: | -----------: | -----------: | -----------: | --------: |
| yours (hybrid 2+1) | 56.295 | 1.080e-02 | 1.209e-05 | 3.053e-05 | 4.520e-07 |
| yours − 3rd step | 38.502 | 1.879e-01 | 2.480e-04 | 1.169e-03 | 4.737e-07 |
| faded2 (opt) | 38.899 | 1.879e-01 | 2.480e-04 | 1.169e-03 | 4.737e-07 |
| faded3 (opt) | 68.299 | 8.955e-03 | 1.152e-05 | 3.146e-05 | 4.419e-07 |
| claude2 (rot) | 36.109 | 5.183e+01 | 9.692e-01 | 3.275e+01 | 4.495e-07 |
| claude3 (rot) | 49.230 | 4.905e+01 | 9.457e-01 | 3.277e+01 | 4.370e-07 |
### High ecc (200, 50)
| method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr |
| ------------------ | ------: | -----------: | -----------: | -----------: | --------: |
| yours (hybrid 2+1) | 59.147 | 5.636e-02 | 2.207e-05 | 1.830e-04 | 4.311e-07 |
| yours − 3rd step | 38.197 | 5.382e-01 | 9.483e-03 | 2.230e-01 | 4.467e-07 |
| faded2 (opt) | 39.917 | 5.382e-01 | 9.483e-03 | 2.230e-01 | 4.467e-07 |
| faded3 (opt) | 74.481 | 3.022e-02 | 8.965e-05 | 2.498e-03 | 4.407e-07 |
| claude2 (rot) | 33.235 | 1.357e+02 | 7.134e+00 | 1.224e+02 | 5.302e-07 |
| claude3 (rot) | 49.514 | 1.375e+02 | 6.699e+00 | 1.232e+02 | 4.801e-07 |
### Extreme (100, 10)
| method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr |
| ------------------ | ------: | -----------: | -----------: | -----------: | --------: |
| yours (hybrid 2+1) | 58.911 | 1.057e-01 | 2.983e-05 | 3.744e-05 | 4.497e-07 |
| yours − 3rd step | 38.725 | 5.581e-01 | 1.797e-03 | 4.816e-02 | 4.638e-07 |
| faded2 (opt) | 40.019 | 5.581e-01 | 1.797e-03 | 4.816e-02 | 4.638e-07 |
| faded3 (opt) | 71.495 | 1.985e-02 | 3.397e-05 | 7.202e-04 | 4.628e-07 |
| claude2 (rot) | 33.035 | 8.500e+01 | 5.297e+00 | 7.538e+01 | 5.413e-07 |
| claude3 (rot) | 49.750 | 8.260e+01 | 4.858e+00 | 7.578e+01 | 4.879e-07 |
### Circle (100, 100)
| method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr |
| ------------------ | ------: | -----------: | -----------: | -----------: | --------: |
| yours (hybrid 2+1) | 55.238 | 3.412e-05 | 5.876e-06 | 1.904e-05 | 4.422e-07 |
| yours − 3rd step | 39.317 | 3.146e-05 | 3.811e-06 | 1.570e-05 | 3.235e-07 |
| faded2 (opt) | 39.397 | 3.146e-05 | 3.811e-06 | 1.570e-05 | 3.235e-07 |
| faded3 (opt) | 70.926 | 3.133e-05 | 4.978e-06 | 1.678e-05 | 3.460e-07 |
| claude2 (rot) | 34.716 | 3.412e-05 | 5.096e-06 | 1.705e-05 | 4.538e-07 |
| claude3 (rot) | 49.998 | 3.412e-05 | 5.313e-06 | 1.706e-05 | 4.521e-07 |
Notes you’ll notice from the table:
* **`yours − 3rd step` and `faded2 (opt)` are the same algorithmic idea** (2 iterations of the optimized curvature update), hence the near-identical stats.
* **`claude2/claude3` match the ellipse equation (low maxEqErr) but are not the closest point** for non-circles (huge distance error vs robust reference).
If you want to rerun this on your target machine:
[Download the benchmark I used](sandbox:/mnt/data/ellipse_compare_table.cpp)
#include <cmath>
#include <cstdio>
#include <chrono>
#include <random>
#include <vector>
#include <algorithm>
#include <cstdint>
using Clock = std::chrono::high_resolution_clock;
struct Pt { float x,y; };
static inline float clamp01(float v){ return std::fmin(1.0f, std::fmax(0.0f, v)); }
// -----------------------------------------------------------------------------
// Robust reference (double): Eberly-style monotone root for t, solved by bisection
// Ellipse: (x/a)^2 + (y/b)^2 = 1, centered at origin, axis-aligned.
// Returns closest point on ellipse.
// -----------------------------------------------------------------------------
static inline Pt reference_eberly(float a, float b, float px, float py){
const double alpha = (double)a * (double)a;
const double beta = (double)b * (double)b;
const double sx = (px < 0.f) ? -1.0 : 1.0;
const double sy = (py < 0.f) ? -1.0 : 1.0;
const double x = std::fabs((double)px);
const double y = std::fabs((double)py);
if (x == 0.0 && y == 0.0) {
// Any point on ellipse is equally distant from origin; pick (0,b)
return {0.0f, (float)(sy * (double)b)};
}
auto F = [&](double t){
const double da = t + alpha;
const double db = t + beta;
const double termA = alpha * x * x / (da*da);
const double termB = beta * y * y / (db*db);
return termA + termB - 1.0;
};
const double insideTest = (x*x)/alpha + (y*y)/beta;
const bool outside = insideTest > 1.0;
double lo, hi;
if (outside) {
lo = 0.0;
hi = 1.0;
// Expand until F(hi) < 0
for (int i=0;i<80 && F(hi) > 0.0; ++i) hi *= 2.0;
} else {
const double m = std::min(alpha, beta);
// Avoid singularity at -m
lo = -m + std::numeric_limits<double>::epsilon()*m;
hi = 0.0;
}
// Bisection
double t = 0.5*(lo+hi);
for (int i=0;i<64;++i) {
t = 0.5*(lo+hi);
const double f = F(t);
if (f > 0.0) lo = t;
else hi = t;
}
const double cx = (alpha * x) / (t + alpha);
const double cy = (beta * y) / (t + beta);
return { (float)(sx*cx), (float)(sy*cy) };
}
// -----------------------------------------------------------------------------
// 0xfaded optimized curvature step (r/q combined) - parameterized iters
// -----------------------------------------------------------------------------
template<int ITERS>
static inline Pt faded_opt(float a, float b, float px, float py){
float px_abs = std::fabs(px), py_abs = std::fabs(py);
float tx = 0.70710678f, ty = 0.70710678f;
float a2 = a*a, b2 = b*b;
float ca = (a2-b2)/a, cb = (b2-a2)/b;
for (int i=0;i<ITERS;++i){
float x = a*tx, y = b*ty;
float tx3 = tx*tx*tx, ty3 = ty*ty*ty;
float ex = ca*tx3, ey = cb*ty3;
float rx = x - ex, ry = y - ey;
float qx = px_abs - ex, qy = py_abs - ey;
float rq = std::sqrt((rx*rx + ry*ry) / (qx*qx + qy*qy + 1e-30f));
tx = clamp01((qx * rq + ex) / a);
ty = clamp01((qy * rq + ey) / b);
float invt = 1.0f / std::sqrt(tx*tx + ty*ty);
tx *= invt; ty *= invt;
}
return { std::copysign(a*tx, px), std::copysign(b*ty, py) };
}
// -----------------------------------------------------------------------------
// Claude rotation-trick Newton - parameterized iters
// -----------------------------------------------------------------------------
template<int ITERS>
static inline Pt claude_rot(float a, float b, float px, float py){
float px_abs = std::fabs(px), py_abs = std::fabs(py);
float a2mb2 = a*a - b*b;
float nx = px_abs/a, ny = py_abs/b;
float len = std::sqrt(nx*nx + ny*ny + 1e-10f);
float c = nx/len, s = ny/len;
for (int i=0;i<ITERS;++i){
float f = a2mb2*s*c - px_abs*a*s + py_abs*b*c;
float fp = a2mb2*(c*c - s*s) - px_abs*a*c - py_abs*b*s;
if (std::fabs(fp) < 1e-10f) break;
float dt = f/fp;
float nc = c + dt*s;
float ns = s - dt*c;
float inv = 1.0f / std::sqrt(nc*nc + ns*ns);
c = nc*inv;
s = ns*inv;
}
return { std::copysign(a*c, px), std::copysign(b*s, py) };
}
// -----------------------------------------------------------------------------
// "Yours" hybrid: 2 curvature steps + 1 Newton snap
// -----------------------------------------------------------------------------
static inline Pt yours_hybrid(float a, float b, float px, float py){
float px_abs = std::fabs(px), py_abs = std::fabs(py);
float a2 = a*a, b2 = b*b;
float ca = (a2-b2)/a, cb = (b2-a2)/b;
float a2mb2 = a2 - b2;
float tx = 0.70710678f, ty = 0.70710678f;
// 2 curvature iterations
for (int i=0;i<2;++i){
float x = a*tx, y = b*ty;
float tx3 = tx*tx*tx, ty3 = ty*ty*ty;
float ex = ca*tx3, ey = cb*ty3;
float rx = x - ex, ry = y - ey;
float qx = px_abs - ex, qy = py_abs - ey;
float rq = std::sqrt((rx*rx + ry*ry) / (qx*qx + qy*qy + 1e-30f));
tx = clamp01((qx * rq + ex) / a);
ty = clamp01((qy * rq + ey) / b);
float invt = 1.0f / std::sqrt(tx*tx + ty*ty);
tx *= invt; ty *= invt;
}
// 1 Newton snap using (tx,ty) as (cos,sin)
float c = tx, s = ty;
float f = a2mb2*s*c - px_abs*a*s + py_abs*b*c;
float fp = a2mb2*(c*c - s*s) - px_abs*a*c - py_abs*b*s;
if (std::fabs(fp) > 1e-10f) {
float dt = f/fp;
float nc = c + dt*s;
float ns = s - dt*c;
float inv = 1.0f / std::sqrt(nc*nc + ns*ns);
c = nc*inv;
s = ns*inv;
}
return { std::copysign(a*c, px), std::copysign(b*s, py) };
}
// "yours minus third step" = just 2 curvature (same as faded_opt<2>)
static inline Pt yours_minus_third(float a, float b, float px, float py){
return faded_opt<2>(a,b,px,py);
}
volatile float sink;
static inline void escape(Pt p){ sink = p.x + p.y; }
struct Acc {
double max_dist_err;
double avg_dist_err;
double p99_dist_err;
double max_eq_err;
};
static inline double distf(float x, float y){ return std::sqrt((double)x*(double)x + (double)y*(double)y); }
template<typename F>
static Acc measure_accuracy(F fn, float a, float b, const std::vector<Pt>& pts){
std::vector<double> errs;
errs.reserve(pts.size());
double max_eq = 0.0;
const double invA = 1.0 / (double)a;
const double invB = 1.0 / (double)b;
for (const auto& p: pts){
Pt got = fn(a,b,p.x,p.y);
Pt ref = reference_eberly(a,b,p.x,p.y);
// eq error of got
double u = (double)got.x * invA;
double v = (double)got.y * invB;
double eq = std::fabs(u*u + v*v - 1.0);
if (std::isfinite(eq)) max_eq = std::max(max_eq, eq);
double dg = distf(p.x - got.x, p.y - got.y);
double dr = distf(p.x - ref.x, p.y - ref.y);
double de = std::fabs(dg - dr);
if (std::isfinite(de)) errs.push_back(de);
}
std::sort(errs.begin(), errs.end());
double sum = 0.0;
for (double e: errs) sum += e;
Acc out{};
out.max_eq_err = max_eq;
out.max_dist_err = errs.empty() ? 0.0 : errs.back();
out.avg_dist_err = errs.empty() ? 0.0 : sum / (double)errs.size();
out.p99_dist_err = errs.empty() ? 0.0 : errs[(size_t)((errs.size()-1) * 0.99)];
return out;
}
template<typename F>
static double bench_ns(F fn, float a, float b, const std::vector<Pt>& pts, int runs=9){
// warmup
for(int w=0; w<3; ++w) for(const auto& p: pts) escape(fn(a,b,p.x,p.y));
std::vector<double> times;
times.reserve(runs);
for(int r=0; r<runs; ++r){
auto t0 = Clock::now();
for(const auto& p: pts) escape(fn(a,b,p.x,p.y));
auto t1 = Clock::now();
double ns = std::chrono::duration<double,std::nano>(t1-t0).count();
times.push_back(ns / (double)pts.size());
}
std::sort(times.begin(), times.end());
return times[times.size()/2];
}
struct Row { const char* name; double ns; Acc acc; };
int main(){
std::mt19937 rng(12345);
std::uniform_real_distribution<float> ang(0.0f, 2.0f*(float)M_PI);
std::uniform_real_distribution<float> rad(0.1f, 3.0f);
const int N_time = 50000;
const int N_acc = 20000;
struct Cfg { float a,b; const char* name; } cfgs[] = {
{150,100,"Moderate (150,100)"},
{200, 50,"High ecc (200,50)"},
{100, 10,"Extreme (100,10)"},
{100,100,"Circle (100,100)"},
};
auto gen_pts = [&](float a,float b,int N){
std::vector<Pt> pts; pts.reserve(N);
for(int i=0;i<N;++i){
float t = ang(rng);
float r = rad(rng);
pts.push_back({a*r*std::cos(t), b*r*std::sin(t)});
}
return pts;
};
for(const auto& cfg: cfgs){
auto pts_time = gen_pts(cfg.a,cfg.b,N_time);
auto pts_acc = gen_pts(cfg.a,cfg.b,N_acc);
Row rows[6];
rows[0] = {"yours (hybrid 2+1)", bench_ns(yours_hybrid, cfg.a,cfg.b, pts_time), measure_accuracy(yours_hybrid, cfg.a,cfg.b, pts_acc)};
rows[1] = {"yours - 3rd step", bench_ns(yours_minus_third, cfg.a,cfg.b, pts_time), measure_accuracy(yours_minus_third, cfg.a,cfg.b, pts_acc)};
rows[2] = {"faded2 (opt)", bench_ns(faded_opt<2>, cfg.a,cfg.b, pts_time), measure_accuracy(faded_opt<2>, cfg.a,cfg.b, pts_acc)};
rows[3] = {"faded3 (opt)", bench_ns(faded_opt<3>, cfg.a,cfg.b, pts_time), measure_accuracy(faded_opt<3>, cfg.a,cfg.b, pts_acc)};
rows[4] = {"claude2 (rot)", bench_ns(claude_rot<2>, cfg.a,cfg.b, pts_time), measure_accuracy(claude_rot<2>, cfg.a,cfg.b, pts_acc)};
rows[5] = {"claude3 (rot)", bench_ns(claude_rot<3>, cfg.a,cfg.b, pts_time), measure_accuracy(claude_rot<3>, cfg.a,cfg.b, pts_acc)};
std::printf("CONFIG %s\n", cfg.name);
std::printf("name\t\tns_per_call\tmaxDistErr\tavgDistErr\tp99DistErr\tmaxEqErr\n");
for(const auto& row: rows){
std::printf("%s\t%.3f\t%.3e\t%.3e\t%.3e\t%.3e\n",
row.name, row.ns, row.acc.max_dist_err, row.acc.avg_dist_err, row.acc.p99_dist_err, row.acc.max_eq_err);
}
std::printf("\n");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment