Created
January 4, 2026 07:33
-
-
Save achille/23680e9100db87565a8e67038797b27d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Context: | |
| https://gist.github.com/achille/d1eadf82aa54056b9ded7706e8f56760 | |
| https://chatgpt.com/share/6959ed27-accc-800e-8e87-21aa81e93c07 | |
| Claude Benchmark: | |
| Config: Moderate (150,100) - CORRECT SOLUTIONS ONLY | |
| ──────────────────────────────────────────────────────────────────────────────── | |
| Method Time max_err wrong/20000 | |
| ───────────────────────────────────────────────────────────────────────────── | |
| faded_opt (2 iter) 30.1 ns 0.18 0 ✅ | |
| chatgpt_2iter 30.1 ns 0.18 0 ✅ (same as faded_opt 2) | |
| chatgpt_adaptive 35.5 ns 0.018 0 ✅ | |
| chatgpt_hybrid (2+1) 41.4 ns 0.020 0 ✅ | |
| faded_opt (3 iter) 47.6 ns 0.018 0 ✅ | |
| claude_rot (2 iter) 21.2 ns 52 1018 ❌ BROKEN | |
| claude_rot (3 iter) 33.7 ns 43 909 ❌ BROKEN | |
| claude_rot (4 iter) 43.8 ns 44 891 ❌ BROKEN | |
| Chatgpt Benchmark: | |
| Ran it **in my environment** (g++ `-O3 -march=native -ffast-math`, `float`), measuring: | |
| * **runtime:** `ns/call` (median over 9 runs, 50k points) | |
| * **accuracy:** absolute **distance error vs a robust double-precision reference** (max / avg / p99), plus `maxEqErr = |(x/a)^2+(y/b)^2-1|` for the returned point. | |
| ### Moderate (150, 100) | |
| | method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr | | |
| | ------------------ | ------: | -----------: | -----------: | -----------: | --------: | | |
| | yours (hybrid 2+1) | 56.295 | 1.080e-02 | 1.209e-05 | 3.053e-05 | 4.520e-07 | | |
| | yours − 3rd step | 38.502 | 1.879e-01 | 2.480e-04 | 1.169e-03 | 4.737e-07 | | |
| | faded2 (opt) | 38.899 | 1.879e-01 | 2.480e-04 | 1.169e-03 | 4.737e-07 | | |
| | faded3 (opt) | 68.299 | 8.955e-03 | 1.152e-05 | 3.146e-05 | 4.419e-07 | | |
| | claude2 (rot) | 36.109 | 5.183e+01 | 9.692e-01 | 3.275e+01 | 4.495e-07 | | |
| | claude3 (rot) | 49.230 | 4.905e+01 | 9.457e-01 | 3.277e+01 | 4.370e-07 | | |
| ### High ecc (200, 50) | |
| | method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr | | |
| | ------------------ | ------: | -----------: | -----------: | -----------: | --------: | | |
| | yours (hybrid 2+1) | 59.147 | 5.636e-02 | 2.207e-05 | 1.830e-04 | 4.311e-07 | | |
| | yours − 3rd step | 38.197 | 5.382e-01 | 9.483e-03 | 2.230e-01 | 4.467e-07 | | |
| | faded2 (opt) | 39.917 | 5.382e-01 | 9.483e-03 | 2.230e-01 | 4.467e-07 | | |
| | faded3 (opt) | 74.481 | 3.022e-02 | 8.965e-05 | 2.498e-03 | 4.407e-07 | | |
| | claude2 (rot) | 33.235 | 1.357e+02 | 7.134e+00 | 1.224e+02 | 5.302e-07 | | |
| | claude3 (rot) | 49.514 | 1.375e+02 | 6.699e+00 | 1.232e+02 | 4.801e-07 | | |
| ### Extreme (100, 10) | |
| | method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr | | |
| | ------------------ | ------: | -----------: | -----------: | -----------: | --------: | | |
| | yours (hybrid 2+1) | 58.911 | 1.057e-01 | 2.983e-05 | 3.744e-05 | 4.497e-07 | | |
| | yours − 3rd step | 38.725 | 5.581e-01 | 1.797e-03 | 4.816e-02 | 4.638e-07 | | |
| | faded2 (opt) | 40.019 | 5.581e-01 | 1.797e-03 | 4.816e-02 | 4.638e-07 | | |
| | faded3 (opt) | 71.495 | 1.985e-02 | 3.397e-05 | 7.202e-04 | 4.628e-07 | | |
| | claude2 (rot) | 33.035 | 8.500e+01 | 5.297e+00 | 7.538e+01 | 5.413e-07 | | |
| | claude3 (rot) | 49.750 | 8.260e+01 | 4.858e+00 | 7.578e+01 | 4.879e-07 | | |
| ### Circle (100, 100) | |
| | method | ns/call | max dist err | avg dist err | p99 dist err | maxEqErr | | |
| | ------------------ | ------: | -----------: | -----------: | -----------: | --------: | | |
| | yours (hybrid 2+1) | 55.238 | 3.412e-05 | 5.876e-06 | 1.904e-05 | 4.422e-07 | | |
| | yours − 3rd step | 39.317 | 3.146e-05 | 3.811e-06 | 1.570e-05 | 3.235e-07 | | |
| | faded2 (opt) | 39.397 | 3.146e-05 | 3.811e-06 | 1.570e-05 | 3.235e-07 | | |
| | faded3 (opt) | 70.926 | 3.133e-05 | 4.978e-06 | 1.678e-05 | 3.460e-07 | | |
| | claude2 (rot) | 34.716 | 3.412e-05 | 5.096e-06 | 1.705e-05 | 4.538e-07 | | |
| | claude3 (rot) | 49.998 | 3.412e-05 | 5.313e-06 | 1.706e-05 | 4.521e-07 | | |
| Notes you’ll notice from the table: | |
| * **`yours − 3rd step` and `faded2 (opt)` are the same algorithmic idea** (2 iterations of the optimized curvature update), hence the near-identical stats. | |
| * **`claude2/claude3` match the ellipse equation (low maxEqErr) but are not the closest point** for non-circles (huge distance error vs robust reference). | |
| If you want to rerun this on your target machine: | |
| [Download the benchmark I used](sandbox:/mnt/data/ellipse_compare_table.cpp) | |
| #include <cmath> | |
| #include <cstdio> | |
| #include <chrono> | |
| #include <random> | |
| #include <vector> | |
| #include <algorithm> | |
| #include <cstdint> | |
| using Clock = std::chrono::high_resolution_clock; | |
| struct Pt { float x,y; }; | |
| static inline float clamp01(float v){ return std::fmin(1.0f, std::fmax(0.0f, v)); } | |
| // ----------------------------------------------------------------------------- | |
| // Robust reference (double): Eberly-style monotone root for t, solved by bisection | |
| // Ellipse: (x/a)^2 + (y/b)^2 = 1, centered at origin, axis-aligned. | |
| // Returns closest point on ellipse. | |
| // ----------------------------------------------------------------------------- | |
| static inline Pt reference_eberly(float a, float b, float px, float py){ | |
| const double alpha = (double)a * (double)a; | |
| const double beta = (double)b * (double)b; | |
| const double sx = (px < 0.f) ? -1.0 : 1.0; | |
| const double sy = (py < 0.f) ? -1.0 : 1.0; | |
| const double x = std::fabs((double)px); | |
| const double y = std::fabs((double)py); | |
| if (x == 0.0 && y == 0.0) { | |
| // Any point on ellipse is equally distant from origin; pick (0,b) | |
| return {0.0f, (float)(sy * (double)b)}; | |
| } | |
| auto F = [&](double t){ | |
| const double da = t + alpha; | |
| const double db = t + beta; | |
| const double termA = alpha * x * x / (da*da); | |
| const double termB = beta * y * y / (db*db); | |
| return termA + termB - 1.0; | |
| }; | |
| const double insideTest = (x*x)/alpha + (y*y)/beta; | |
| const bool outside = insideTest > 1.0; | |
| double lo, hi; | |
| if (outside) { | |
| lo = 0.0; | |
| hi = 1.0; | |
| // Expand until F(hi) < 0 | |
| for (int i=0;i<80 && F(hi) > 0.0; ++i) hi *= 2.0; | |
| } else { | |
| const double m = std::min(alpha, beta); | |
| // Avoid singularity at -m | |
| lo = -m + std::numeric_limits<double>::epsilon()*m; | |
| hi = 0.0; | |
| } | |
| // Bisection | |
| double t = 0.5*(lo+hi); | |
| for (int i=0;i<64;++i) { | |
| t = 0.5*(lo+hi); | |
| const double f = F(t); | |
| if (f > 0.0) lo = t; | |
| else hi = t; | |
| } | |
| const double cx = (alpha * x) / (t + alpha); | |
| const double cy = (beta * y) / (t + beta); | |
| return { (float)(sx*cx), (float)(sy*cy) }; | |
| } | |
| // ----------------------------------------------------------------------------- | |
| // 0xfaded optimized curvature step (r/q combined) - parameterized iters | |
| // ----------------------------------------------------------------------------- | |
| template<int ITERS> | |
| static inline Pt faded_opt(float a, float b, float px, float py){ | |
| float px_abs = std::fabs(px), py_abs = std::fabs(py); | |
| float tx = 0.70710678f, ty = 0.70710678f; | |
| float a2 = a*a, b2 = b*b; | |
| float ca = (a2-b2)/a, cb = (b2-a2)/b; | |
| for (int i=0;i<ITERS;++i){ | |
| float x = a*tx, y = b*ty; | |
| float tx3 = tx*tx*tx, ty3 = ty*ty*ty; | |
| float ex = ca*tx3, ey = cb*ty3; | |
| float rx = x - ex, ry = y - ey; | |
| float qx = px_abs - ex, qy = py_abs - ey; | |
| float rq = std::sqrt((rx*rx + ry*ry) / (qx*qx + qy*qy + 1e-30f)); | |
| tx = clamp01((qx * rq + ex) / a); | |
| ty = clamp01((qy * rq + ey) / b); | |
| float invt = 1.0f / std::sqrt(tx*tx + ty*ty); | |
| tx *= invt; ty *= invt; | |
| } | |
| return { std::copysign(a*tx, px), std::copysign(b*ty, py) }; | |
| } | |
| // ----------------------------------------------------------------------------- | |
| // Claude rotation-trick Newton - parameterized iters | |
| // ----------------------------------------------------------------------------- | |
| template<int ITERS> | |
| static inline Pt claude_rot(float a, float b, float px, float py){ | |
| float px_abs = std::fabs(px), py_abs = std::fabs(py); | |
| float a2mb2 = a*a - b*b; | |
| float nx = px_abs/a, ny = py_abs/b; | |
| float len = std::sqrt(nx*nx + ny*ny + 1e-10f); | |
| float c = nx/len, s = ny/len; | |
| for (int i=0;i<ITERS;++i){ | |
| float f = a2mb2*s*c - px_abs*a*s + py_abs*b*c; | |
| float fp = a2mb2*(c*c - s*s) - px_abs*a*c - py_abs*b*s; | |
| if (std::fabs(fp) < 1e-10f) break; | |
| float dt = f/fp; | |
| float nc = c + dt*s; | |
| float ns = s - dt*c; | |
| float inv = 1.0f / std::sqrt(nc*nc + ns*ns); | |
| c = nc*inv; | |
| s = ns*inv; | |
| } | |
| return { std::copysign(a*c, px), std::copysign(b*s, py) }; | |
| } | |
| // ----------------------------------------------------------------------------- | |
| // "Yours" hybrid: 2 curvature steps + 1 Newton snap | |
| // ----------------------------------------------------------------------------- | |
| static inline Pt yours_hybrid(float a, float b, float px, float py){ | |
| float px_abs = std::fabs(px), py_abs = std::fabs(py); | |
| float a2 = a*a, b2 = b*b; | |
| float ca = (a2-b2)/a, cb = (b2-a2)/b; | |
| float a2mb2 = a2 - b2; | |
| float tx = 0.70710678f, ty = 0.70710678f; | |
| // 2 curvature iterations | |
| for (int i=0;i<2;++i){ | |
| float x = a*tx, y = b*ty; | |
| float tx3 = tx*tx*tx, ty3 = ty*ty*ty; | |
| float ex = ca*tx3, ey = cb*ty3; | |
| float rx = x - ex, ry = y - ey; | |
| float qx = px_abs - ex, qy = py_abs - ey; | |
| float rq = std::sqrt((rx*rx + ry*ry) / (qx*qx + qy*qy + 1e-30f)); | |
| tx = clamp01((qx * rq + ex) / a); | |
| ty = clamp01((qy * rq + ey) / b); | |
| float invt = 1.0f / std::sqrt(tx*tx + ty*ty); | |
| tx *= invt; ty *= invt; | |
| } | |
| // 1 Newton snap using (tx,ty) as (cos,sin) | |
| float c = tx, s = ty; | |
| float f = a2mb2*s*c - px_abs*a*s + py_abs*b*c; | |
| float fp = a2mb2*(c*c - s*s) - px_abs*a*c - py_abs*b*s; | |
| if (std::fabs(fp) > 1e-10f) { | |
| float dt = f/fp; | |
| float nc = c + dt*s; | |
| float ns = s - dt*c; | |
| float inv = 1.0f / std::sqrt(nc*nc + ns*ns); | |
| c = nc*inv; | |
| s = ns*inv; | |
| } | |
| return { std::copysign(a*c, px), std::copysign(b*s, py) }; | |
| } | |
| // "yours minus third step" = just 2 curvature (same as faded_opt<2>) | |
| static inline Pt yours_minus_third(float a, float b, float px, float py){ | |
| return faded_opt<2>(a,b,px,py); | |
| } | |
| volatile float sink; | |
| static inline void escape(Pt p){ sink = p.x + p.y; } | |
| struct Acc { | |
| double max_dist_err; | |
| double avg_dist_err; | |
| double p99_dist_err; | |
| double max_eq_err; | |
| }; | |
| static inline double distf(float x, float y){ return std::sqrt((double)x*(double)x + (double)y*(double)y); } | |
| template<typename F> | |
| static Acc measure_accuracy(F fn, float a, float b, const std::vector<Pt>& pts){ | |
| std::vector<double> errs; | |
| errs.reserve(pts.size()); | |
| double max_eq = 0.0; | |
| const double invA = 1.0 / (double)a; | |
| const double invB = 1.0 / (double)b; | |
| for (const auto& p: pts){ | |
| Pt got = fn(a,b,p.x,p.y); | |
| Pt ref = reference_eberly(a,b,p.x,p.y); | |
| // eq error of got | |
| double u = (double)got.x * invA; | |
| double v = (double)got.y * invB; | |
| double eq = std::fabs(u*u + v*v - 1.0); | |
| if (std::isfinite(eq)) max_eq = std::max(max_eq, eq); | |
| double dg = distf(p.x - got.x, p.y - got.y); | |
| double dr = distf(p.x - ref.x, p.y - ref.y); | |
| double de = std::fabs(dg - dr); | |
| if (std::isfinite(de)) errs.push_back(de); | |
| } | |
| std::sort(errs.begin(), errs.end()); | |
| double sum = 0.0; | |
| for (double e: errs) sum += e; | |
| Acc out{}; | |
| out.max_eq_err = max_eq; | |
| out.max_dist_err = errs.empty() ? 0.0 : errs.back(); | |
| out.avg_dist_err = errs.empty() ? 0.0 : sum / (double)errs.size(); | |
| out.p99_dist_err = errs.empty() ? 0.0 : errs[(size_t)((errs.size()-1) * 0.99)]; | |
| return out; | |
| } | |
| template<typename F> | |
| static double bench_ns(F fn, float a, float b, const std::vector<Pt>& pts, int runs=9){ | |
| // warmup | |
| for(int w=0; w<3; ++w) for(const auto& p: pts) escape(fn(a,b,p.x,p.y)); | |
| std::vector<double> times; | |
| times.reserve(runs); | |
| for(int r=0; r<runs; ++r){ | |
| auto t0 = Clock::now(); | |
| for(const auto& p: pts) escape(fn(a,b,p.x,p.y)); | |
| auto t1 = Clock::now(); | |
| double ns = std::chrono::duration<double,std::nano>(t1-t0).count(); | |
| times.push_back(ns / (double)pts.size()); | |
| } | |
| std::sort(times.begin(), times.end()); | |
| return times[times.size()/2]; | |
| } | |
| struct Row { const char* name; double ns; Acc acc; }; | |
| int main(){ | |
| std::mt19937 rng(12345); | |
| std::uniform_real_distribution<float> ang(0.0f, 2.0f*(float)M_PI); | |
| std::uniform_real_distribution<float> rad(0.1f, 3.0f); | |
| const int N_time = 50000; | |
| const int N_acc = 20000; | |
| struct Cfg { float a,b; const char* name; } cfgs[] = { | |
| {150,100,"Moderate (150,100)"}, | |
| {200, 50,"High ecc (200,50)"}, | |
| {100, 10,"Extreme (100,10)"}, | |
| {100,100,"Circle (100,100)"}, | |
| }; | |
| auto gen_pts = [&](float a,float b,int N){ | |
| std::vector<Pt> pts; pts.reserve(N); | |
| for(int i=0;i<N;++i){ | |
| float t = ang(rng); | |
| float r = rad(rng); | |
| pts.push_back({a*r*std::cos(t), b*r*std::sin(t)}); | |
| } | |
| return pts; | |
| }; | |
| for(const auto& cfg: cfgs){ | |
| auto pts_time = gen_pts(cfg.a,cfg.b,N_time); | |
| auto pts_acc = gen_pts(cfg.a,cfg.b,N_acc); | |
| Row rows[6]; | |
| rows[0] = {"yours (hybrid 2+1)", bench_ns(yours_hybrid, cfg.a,cfg.b, pts_time), measure_accuracy(yours_hybrid, cfg.a,cfg.b, pts_acc)}; | |
| rows[1] = {"yours - 3rd step", bench_ns(yours_minus_third, cfg.a,cfg.b, pts_time), measure_accuracy(yours_minus_third, cfg.a,cfg.b, pts_acc)}; | |
| rows[2] = {"faded2 (opt)", bench_ns(faded_opt<2>, cfg.a,cfg.b, pts_time), measure_accuracy(faded_opt<2>, cfg.a,cfg.b, pts_acc)}; | |
| rows[3] = {"faded3 (opt)", bench_ns(faded_opt<3>, cfg.a,cfg.b, pts_time), measure_accuracy(faded_opt<3>, cfg.a,cfg.b, pts_acc)}; | |
| rows[4] = {"claude2 (rot)", bench_ns(claude_rot<2>, cfg.a,cfg.b, pts_time), measure_accuracy(claude_rot<2>, cfg.a,cfg.b, pts_acc)}; | |
| rows[5] = {"claude3 (rot)", bench_ns(claude_rot<3>, cfg.a,cfg.b, pts_time), measure_accuracy(claude_rot<3>, cfg.a,cfg.b, pts_acc)}; | |
| std::printf("CONFIG %s\n", cfg.name); | |
| std::printf("name\t\tns_per_call\tmaxDistErr\tavgDistErr\tp99DistErr\tmaxEqErr\n"); | |
| for(const auto& row: rows){ | |
| std::printf("%s\t%.3f\t%.3e\t%.3e\t%.3e\t%.3e\n", | |
| row.name, row.ns, row.acc.max_dist_err, row.acc.avg_dist_err, row.acc.p99_dist_err, row.acc.max_eq_err); | |
| } | |
| std::printf("\n"); | |
| } | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment