antisaling/fast_sincos.odin

## fast_sincos.odin
import "core:math/simd"

svec :: #simd[4]f32
suvec :: #simd[4]u32

// taken from JoltPhysics
fast_sincos :: proc "contextless" (v: svec) -> (sin, cos: svec) {
    // Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function, changes octants to quadrants and vectorizes it
    // Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)

    // Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit)
    sin_sign := simd.and(transmute(suvec)v, suvec(0x80000000))
    x := transmute(svec)simd.xor(transmute(suvec)v, sin_sign)

    // x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
    quadrant := suvec(0.6366197723675814 * x + svec(0.5))

    // Make x relative to the closest quadrant.
    // This does x = x - quadrant * PI / 2 using a two step Cody-Waite argument reduction.
    // This improves the accuracy of the result by avoiding loss of significant bits in the subtraction.
    // We start with x = x - quadrant * PI / 2, PI / 2 in hexadecimal notation is 0x3fc90fdb, we remove the lowest 16 bits to
    // get 0x3fc90000 (= 1.5703125) this means we can now multiply with a number of up to 2^16 without losing any bits.
    // This leaves us with: x = (x - quadrant * 1.5703125) - quadrant * (PI / 2 - 1.5703125).
    // PI / 2 - 1.5703125 in hexadecimal is 0x39fdaa22, stripping the lowest 12 bits we get 0x39fda000 (= 0.0004837512969970703125)
    // This leaves uw with: x = ((x - quadrant * 1.5703125) - quadrant * 0.0004837512969970703125) - quadrant * (PI / 2 - 1.5703125 - 0.0004837512969970703125)
    // See: https://stackoverflow.com/questions/42455143/sine-cosine-modular-extended-precision-arithmetic
    // After this we have x in the range [-PI / 4, PI / 4].
    float_quadrant := svec(quadrant)
    x =
        ((x - float_quadrant * 1.5703125) - float_quadrant * 0.0004837512969970703125) -
        float_quadrant * 7.549789948768648e-8

    // Calculate x2 = x^2
    x2 := x * x

    // Taylor expansion:
    // Cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! + ... = (((x2/8!- 1/6!) * x2 + 1/4!) * x2 - 1/2!) * x2 + 1
    taylor_cos :=
        ((2.443315711809948e-5 * x2 - svec(1.388731625493765e-3)) * x2 + svec(4.166664568298827e-2)) * x2 * x2 -
        0.5 * x2 +
        svec(1.)
    // Sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... = ((-x2/7! + 1/5!) * x2 - 1/3!) * x2 * x + x
    taylor_sin := ((-1.9515295891e-4 * x2 + svec(8.3321608736e-3)) * x2 - svec(1.6666654611e-1)) * x2 * x + x

    // The lowest 2 bits of quadrant indicate the quadrant that we are in.
    // Let x be the original input value and x' our value that has been mapped to the range [-PI / 4, PI / 4].
    // since cos(x) = sin(x - PI / 2) and since we want to use the Taylor expansion as close as possible to 0,
    // we can alternate between using the Taylor expansion for sin and cos according to the following table:
    //
    // quadrant	 sin(x)		 cos(x)
    // XXX00b	 sin(x')	 cos(x')
    // XXX01b	 cos(x')	-sin(x')
    // XXX10b	-sin(x')	-cos(x')
    // XXX11b	-cos(x')	 sin(x')
    //
    // So: sin_sign = bit2, cos_sign = bit1 ^ bit2, bit1 determines if we use sin or cos Taylor expansion
    bit1 := simd.shl(quadrant, 31)
    bit2 := simd.and(simd.shl(quadrant, 30), suvec(0x80000000))

    // Select which one of the results is sin and which one is cos
    s := simd.select(bit1, taylor_cos, taylor_sin)
    c := simd.select(bit1, taylor_sin, taylor_cos)

    // Update the signs
    sin_sign = simd.xor(sin_sign, bit2)
    cos_sign := simd.xor(bit1, bit2)

    // Correct the signs
    sin = transmute(svec)simd.xor(transmute(suvec)s, sin_sign)
    cos = transmute(svec)simd.xor(transmute(suvec)c, cos_sign)
    return
}
	import "core:math/simd"

	svec :: #simd[4]f32
	suvec :: #simd[4]u32

	// taken from JoltPhysics
	fast_sincos :: proc "contextless" (v: svec) -> (sin, cos: svec) {
	// Implementation based on sinf.c from the cephes library, combines sinf and cosf in a single function, changes octants to quadrants and vectorizes it
	// Original implementation by Stephen L. Moshier (See: http://www.moshier.net/)

	// Make argument positive and remember sign for sin only since cos is symmetric around x (highest bit of a float is the sign bit)
	sin_sign := simd.and(transmute(suvec)v, suvec(0x80000000))
	x := transmute(svec)simd.xor(transmute(suvec)v, sin_sign)

	// x / (PI / 2) rounded to nearest int gives us the quadrant closest to x
	quadrant := suvec(0.6366197723675814 * x + svec(0.5))

	// Make x relative to the closest quadrant.
	// This does x = x - quadrant * PI / 2 using a two step Cody-Waite argument reduction.
	// This improves the accuracy of the result by avoiding loss of significant bits in the subtraction.
	// We start with x = x - quadrant * PI / 2, PI / 2 in hexadecimal notation is 0x3fc90fdb, we remove the lowest 16 bits to
	// get 0x3fc90000 (= 1.5703125) this means we can now multiply with a number of up to 2^16 without losing any bits.
	// This leaves us with: x = (x - quadrant * 1.5703125) - quadrant * (PI / 2 - 1.5703125).
	// PI / 2 - 1.5703125 in hexadecimal is 0x39fdaa22, stripping the lowest 12 bits we get 0x39fda000 (= 0.0004837512969970703125)
	// This leaves uw with: x = ((x - quadrant * 1.5703125) - quadrant * 0.0004837512969970703125) - quadrant * (PI / 2 - 1.5703125 - 0.0004837512969970703125)
	// See: https://stackoverflow.com/questions/42455143/sine-cosine-modular-extended-precision-arithmetic
	// After this we have x in the range [-PI / 4, PI / 4].
	float_quadrant := svec(quadrant)
	x =
	((x - float_quadrant * 1.5703125) - float_quadrant * 0.0004837512969970703125) -
	float_quadrant * 7.549789948768648e-8

	// Calculate x2 = x^2
	x2 := x * x

	// Taylor expansion:
	// Cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! + ... = (((x2/8!- 1/6!) * x2 + 1/4!) * x2 - 1/2!) * x2 + 1
	taylor_cos :=
	((2.443315711809948e-5 * x2 - svec(1.388731625493765e-3)) * x2 + svec(4.166664568298827e-2)) * x2 * x2 -
	0.5 * x2 +
	svec(1.)
	// Sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... = ((-x2/7! + 1/5!) * x2 - 1/3!) * x2 * x + x
	taylor_sin := ((-1.9515295891e-4 * x2 + svec(8.3321608736e-3)) * x2 - svec(1.6666654611e-1)) * x2 * x + x

	// The lowest 2 bits of quadrant indicate the quadrant that we are in.
	// Let x be the original input value and x' our value that has been mapped to the range [-PI / 4, PI / 4].
	// since cos(x) = sin(x - PI / 2) and since we want to use the Taylor expansion as close as possible to 0,
	// we can alternate between using the Taylor expansion for sin and cos according to the following table:
	//
	// quadrant sin(x) cos(x)
	// XXX00b sin(x') cos(x')
	// XXX01b cos(x') -sin(x')
	// XXX10b -sin(x') -cos(x')
	// XXX11b -cos(x') sin(x')
	//
	// So: sin_sign = bit2, cos_sign = bit1 ^ bit2, bit1 determines if we use sin or cos Taylor expansion
	bit1 := simd.shl(quadrant, 31)
	bit2 := simd.and(simd.shl(quadrant, 30), suvec(0x80000000))

	// Select which one of the results is sin and which one is cos
	s := simd.select(bit1, taylor_cos, taylor_sin)
	c := simd.select(bit1, taylor_sin, taylor_cos)

	// Update the signs
	sin_sign = simd.xor(sin_sign, bit2)
	cos_sign := simd.xor(bit1, bit2)

	// Correct the signs
	sin = transmute(svec)simd.xor(transmute(suvec)s, sin_sign)
	cos = transmute(svec)simd.xor(transmute(suvec)c, cos_sign)
	return
	}
No results found