Skip to content

Instantly share code, notes, and snippets.

@yuygfgg
Created January 12, 2026 15:34
Show Gist options
  • Select an option

  • Save yuygfgg/f047015b95d86cc374e9f5b96bf33154 to your computer and use it in GitHub Desktop.

Select an option

Save yuygfgg/f047015b95d86cc374e9f5b96bf33154 to your computer and use it in GitHub Desktop.
Tiny software IEEE 754 binary32 implementation with 32bit integer arithmetics (C11)
/*
* Internal representation:
* - `negative` : sign bit
* - `exponent` : unbiased exponent in [-126 .. 127] for finite values,
* and 128 for NaN/Inf (special marker)
* - `mantissa` : 24-bit mantissa in [0 .. 2^24)
* - normal: mantissa in [2^23 .. 2^24) (hidden bit present)
* - subnormal mantissa in [0 .. 2^23)
* - zero: mantissa = 0
*/
#ifndef softfloat_H
#define softfloat_H
#include <stdbool.h>
#include <stddef.h>
typedef unsigned int softfloat_u32_t;
typedef int softfloat_i32_t;
enum {
softfloat__require_u32_is_32bit =
1 / ((sizeof(softfloat_u32_t) == 4) ? 1 : 0),
softfloat__require_i32_is_32bit =
1 / ((sizeof(softfloat_i32_t) == 4) ? 1 : 0),
};
/**
* @def softfloat_MANT_BITS
* @brief Number of explicit mantissa bits in IEEE-754 binary32 (fraction field).
*
* Value: 23.
*/
#define softfloat_MANT_BITS 23
/**
* @def softfloat_EXP_BIAS
* @brief Exponent bias for IEEE-754 binary32.
*
* Value: 127.
*/
#define softfloat_EXP_BIAS 127
/**
* @def softfloat_EXP_MIN
* @brief Minimum unbiased exponent for IEEE-754 binary32 finite values.
*
* Value: -126 (subnormals also use this exponent in this library's internal format).
*/
#define softfloat_EXP_MIN (-126)
/**
* @def softfloat_EXP_SPECIAL
* @brief Internal marker exponent value used for NaN/Inf.
*
* Value: 128 (outside the finite unbiased exponent range).
*/
#define softfloat_EXP_SPECIAL 128
/**
* @def softfloat_HIDDEN_BIT
* @brief The implicit leading 1 bit for normal IEEE-754 binary32 mantissas.
*
* Value: 1<<23.
*/
#define softfloat_HIDDEN_BIT (1u << softfloat_MANT_BITS)
/**
* @def softfloat_MANT_MASK
* @brief Mask of the 23 explicit mantissa bits of IEEE-754 binary32.
*/
#define softfloat_MANT_MASK ((1u << softfloat_MANT_BITS) - 1u)
/**
* @def softfloat_MANT24_ONE
* @brief 2^24 (one past the largest valid 24-bit internal mantissa).
*/
#define softfloat_MANT24_ONE (1u << (softfloat_MANT_BITS + 1))
/**
* @def softfloat_MANT24_MASK
* @brief Mask of the 24-bit internal mantissa storage.
*/
#define softfloat_MANT24_MASK ((1u << (softfloat_MANT_BITS + 1)) - 1u)
/**
* @typedef softfloat_t
* @brief Integer-only representation of an IEEE-754 binary32 value.
*
* Fields:
* - `negative` (`bool`): sign bit.
* - `exponent` (`int`): unbiased exponent for finite values, or `softfloat_EXP_SPECIAL` for NaN/Inf.
* - `mantissa` (`softfloat_u32_t`): internal 24-bit mantissa payload.
*
* Recommended construction:
* - Use `softfloat_from_bits()` / `softfloat_to_bits()` to interface with binary32 bit patterns.
* - Use `softfloat_from_i32()` / `softfloat_from_u32()` for integer inputs.
* - Use `softfloat_zero()` / `softfloat_inf()` / `softfloat_nan()` for special values.
*/
typedef struct softfloat_t {
bool negative;
int exponent; /* unbiased exponent, or softfloat_EXP_SPECIAL */
softfloat_u32_t mantissa; /* 24-bit payload */
} softfloat_t;
/**
* @enum softfloat_err_t
* @brief Error codes used by integer conversion helpers.
*
* Notes:
* - Integer conversion in this library is truncation toward zero.
* - Functions returning `softfloat_err_t` always write a deterministic value to
* `*out` (currently 0) even on failure, so you can safely print/log `*out`.
*/
typedef enum softfloat_err_t {
/** Conversion succeeded. */
softfloat_OK = 0,
/** `out == NULL`. */
softfloat_ERR_NULL = 1,
/** Input is NaN. */
softfloat_ERR_NAN = 2,
/** Input is +/-Infinity. */
softfloat_ERR_INF = 3,
/** Unsigned conversion requested but input is negative. */
softfloat_ERR_NEGATIVE = 4,
/** Result does not fit in the requested integer type. */
softfloat_ERR_OVERFLOW = 5,
} softfloat_err_t;
/**
* @brief Create a `softfloat_t` value from raw fields (no normalization).
*
* @param negative Sign bit (`true` => negative).
* @param exponent Unbiased exponent, or `softfloat_EXP_SPECIAL` for NaN/Inf.
* @param mantissa Internal 24-bit mantissa payload (low 24 bits are used).
*
* This function does not validate or normalize the inputs. It is intended for
* low-level construction and internal helpers.
*/
static inline softfloat_t softfloat_make(bool negative, int exponent,
softfloat_u32_t mantissa) {
return (softfloat_t){negative, exponent, mantissa};
}
/**
* @brief Construct +0 or -0.
*
* @param negative `bool`-like: true creates -0, false creates +0.
*/
static inline softfloat_t softfloat_zero(bool negative) {
return softfloat_make(negative, softfloat_EXP_MIN, 0u);
}
/**
* @brief Construct +Infinity or -Infinity.
*
* @param negative `bool`-like: true creates -inf, false creates +inf.
*/
static inline softfloat_t softfloat_inf(bool negative) {
return softfloat_make(negative, softfloat_EXP_SPECIAL, 0u);
}
/**
* @brief Test whether `f` is a NaN.
*
* @param f `softfloat_t` value.
* @return `true` if `f` is a NaN, `false` otherwise.
*/
static inline bool softfloat_isnan(softfloat_t f) {
return (f.exponent == softfloat_EXP_SPECIAL) && (f.mantissa != 0u);
}
/**
* @brief Test whether `f` is an infinity.
*
* @param f `softfloat_t` value.
* @return `true` if `f` is +inf or -inf, `false` otherwise.
*/
static inline bool softfloat_isinf(softfloat_t f) {
return (f.exponent == softfloat_EXP_SPECIAL) && (f.mantissa == 0u);
}
/**
* @brief Test whether `f` is finite (not NaN and not infinity).
*
* @param f `softfloat_t` value.
* @return `true` if finite, `false` if NaN or infinity.
*/
static inline bool softfloat_isfinite(softfloat_t f) {
return !softfloat_isnan(f) && !softfloat_isinf(f);
}
/**
* @brief Test whether `f` is a finite normal number (not subnormal and not zero).
*
* @param f `softfloat_t` value.
* @return `true` if `f` is a finite normal number, `false` otherwise.
*/
static inline bool softfloat_isnormal(softfloat_t f) {
return softfloat_isfinite(f) && (f.mantissa >= softfloat_HIDDEN_BIT);
}
/**
* @brief Negate a value (flip the sign), keeping NaNs unchanged.
*
* @param f `softfloat_t` value.
* @return `softfloat_t` result.
*/
static inline softfloat_t softfloat_neg(softfloat_t f) {
if (softfloat_isnan(f))
return f;
return softfloat_make(!f.negative, f.exponent, f.mantissa);
}
/**
* @brief Construct a quiet NaN (payload is preserved as much as possible).
*
* @param payload `softfloat_u32_t` payload stored in the internal mantissa field.
* Only the low 24 bits are used. If the value would become a NaN with
* a zero mantissa when packed to binary32, bit 0 is forced to 1.
*
* @return A NaN value (`softfloat_isnan(...)` will be true).
*/
softfloat_t softfloat_nan(softfloat_u32_t payload) {
/*
* IEEE-754 NaN requires a non-zero mantissa field in the binary32 encoding.
* Internally we keep 24 bits; when packing to binary32 we drop the hidden bit.
*/
payload &= softfloat_MANT24_MASK;
if ((payload & softfloat_MANT_MASK) == 0)
payload |= 1u;
return softfloat_make(false, softfloat_EXP_SPECIAL, payload);
}
/**
* @brief Decode an IEEE-754 binary32 bit-pattern into `softfloat_t`.
*
* @param bits `softfloat_u32_t` containing the exact IEEE-754 binary32 layout:
* sign (bit 31), exponent (bits 30..23), mantissa/fraction (bits 22..0).
*
* @return The corresponding `softfloat_t` value (including NaN/Inf/subnormal/zero).
*/
softfloat_t softfloat_from_bits(softfloat_u32_t bits) {
const softfloat_u32_t sign_bit = (bits >> 31) & 1u;
const softfloat_u32_t raw_exponent = (bits >> 23) & 0xffu;
const softfloat_u32_t raw_mantissa = bits & softfloat_MANT_MASK;
softfloat_t f;
f.negative = (sign_bit != 0);
f.mantissa = raw_mantissa;
if (raw_exponent == 0) {
f.exponent = softfloat_EXP_MIN; /* zero or subnormal */
} else if (raw_exponent == 0xffu) {
f.exponent = softfloat_EXP_SPECIAL; /* inf / nan */
} else {
f.exponent = (int)raw_exponent - softfloat_EXP_BIAS;
f.mantissa =
raw_mantissa + softfloat_HIDDEN_BIT; /* restore hidden bit */
}
return f;
}
/**
* @brief Encode a `softfloat_t` into an IEEE-754 binary32 bit-pattern.
*
* @param f `softfloat_t` value.
* @return `softfloat_u32_t` containing the exact IEEE-754 binary32 layout.
*/
softfloat_u32_t softfloat_to_bits(softfloat_t f) {
const softfloat_u32_t sign_bit = f.negative ? 1u : 0u;
softfloat_u32_t raw_exponent = 0;
softfloat_u32_t raw_mantissa = f.mantissa & softfloat_MANT_MASK;
if (f.exponent == softfloat_EXP_SPECIAL) {
raw_exponent = 0xffu;
if (f.mantissa != 0 && raw_mantissa == 0)
raw_mantissa = 1u;
} else if (f.exponent == softfloat_EXP_MIN &&
f.mantissa < softfloat_HIDDEN_BIT) {
raw_exponent = 0;
raw_mantissa = f.mantissa & softfloat_MANT_MASK;
} else {
raw_exponent = (softfloat_u32_t)(f.exponent + softfloat_EXP_BIAS);
}
return (sign_bit << 31) | (raw_exponent << 23) | raw_mantissa;
}
/**
* @brief Convert a signed 32-bit integer to `softfloat_t` (exact if representable).
*
* @param value `softfloat_i32_t` input.
* @return `softfloat_t` representing that integer value.
*/
softfloat_t softfloat_from_i32(softfloat_i32_t value) {
if (value == 0)
return softfloat_zero(false);
const bool negative = (value < 0);
const softfloat_u32_t magnitude =
negative ? (0u - (softfloat_u32_t)value) : (softfloat_u32_t)value;
int exponent = 23;
softfloat_u32_t mantissa = magnitude;
while (mantissa < softfloat_HIDDEN_BIT) {
mantissa <<= 1;
exponent--;
}
while (mantissa >= softfloat_MANT24_ONE) {
mantissa >>= 1;
exponent++;
}
return softfloat_make(negative, exponent, mantissa);
}
/**
* @brief Convert an unsigned 32-bit integer to `softfloat_t`.
*
* @param value `softfloat_u32_t` input.
* @return `softfloat_t` representing that integer value.
*/
softfloat_t softfloat_from_u32(softfloat_u32_t value) {
if (value == 0)
return softfloat_zero(false);
int exponent = 23;
softfloat_u32_t mantissa = value;
while (mantissa < softfloat_HIDDEN_BIT) {
mantissa <<= 1;
exponent--;
}
while (mantissa >= softfloat_MANT24_ONE) {
mantissa >>= 1;
exponent++;
}
return softfloat_make(false, exponent, mantissa);
}
/**
* @brief Convert `softfloat_t` to `softfloat_i32_t` by truncation toward zero.
*
* @param f `softfloat_t` input.
* @param out Output pointer (`softfloat_i32_t*`). Must not be NULL.
*
* @return `softfloat_OK` on success, otherwise an error code:
* - `softfloat_ERR_NULL`: `out == NULL`
* - `softfloat_ERR_NAN`: input is NaN
* - `softfloat_ERR_INF`: input is +/-Infinity
* - `softfloat_ERR_OVERFLOW`: result does not fit in `softfloat_i32_t`
*
* On any return value, `*out` is set to a deterministic value (0 on failure).
*/
softfloat_err_t softfloat_to_i32(softfloat_t f, softfloat_i32_t* out) {
if (out == NULL)
return softfloat_ERR_NULL;
*out = 0;
if (softfloat_isnan(f))
return softfloat_ERR_NAN;
if (softfloat_isinf(f))
return softfloat_ERR_INF;
if (f.mantissa == 0) {
*out = 0;
return softfloat_OK;
}
const int shift = f.exponent - 23;
softfloat_u32_t mag = 0;
if (shift >= 0) {
if (shift > 8)
return softfloat_ERR_OVERFLOW;
mag = f.mantissa << (softfloat_u32_t)shift;
} else {
const int rshift = -shift;
if (rshift >= 32)
mag = 0;
else
mag = f.mantissa >> (softfloat_u32_t)rshift;
}
if (f.negative) {
if (mag > 2147483648u)
return softfloat_ERR_OVERFLOW;
if (mag == 2147483648u) {
*out = (-2147483647 - 1);
return softfloat_OK;
}
*out = -(softfloat_i32_t)mag;
return softfloat_OK;
}
if (mag > 2147483647u)
return softfloat_ERR_OVERFLOW;
*out = (softfloat_i32_t)mag;
return softfloat_OK;
}
/**
* @brief Convert `softfloat_t` to `softfloat_u32_t` by truncation toward zero.
*
* @param f `softfloat_t` input.
* @param out Output pointer (`softfloat_u32_t*`). Must not be NULL.
*
* @return `softfloat_OK` on success, otherwise an error code:
* - `softfloat_ERR_NULL`: `out == NULL`
* - `softfloat_ERR_NAN`: input is NaN
* - `softfloat_ERR_INF`: input is +/-Infinity
* - `softfloat_ERR_NEGATIVE`: input is negative
* - `softfloat_ERR_OVERFLOW`: result does not fit in `softfloat_u32_t`
*
* On any return value, `*out` is set to a deterministic value (0 on failure).
*/
softfloat_err_t softfloat_to_u32(softfloat_t f, softfloat_u32_t* out) {
if (out == NULL)
return softfloat_ERR_NULL;
*out = 0;
if (softfloat_isnan(f))
return softfloat_ERR_NAN;
if (softfloat_isinf(f))
return softfloat_ERR_INF;
if (f.mantissa == 0) {
*out = 0;
return softfloat_OK;
}
if (f.negative)
return softfloat_ERR_NEGATIVE;
const int shift = f.exponent - 23;
softfloat_u32_t mag = 0;
if (shift >= 0) {
if (shift > 8)
return softfloat_ERR_OVERFLOW;
mag = f.mantissa << (softfloat_u32_t)shift;
} else {
const int rshift = -shift;
if (rshift >= 32)
mag = 0;
else
mag = f.mantissa >> (softfloat_u32_t)rshift;
}
*out = mag;
return softfloat_OK;
}
/**
* @brief Convenience wrapper around `softfloat_to_i32` returning a boolean.
*
* @param f `softfloat_t` input.
* @param out `softfloat_i32_t*` output pointer.
* @return `true` on success, `false` on failure.
*/
static inline bool softfloat_try_to_i32(softfloat_t f, softfloat_i32_t* out) {
return softfloat_to_i32(f, out) == softfloat_OK;
}
/**
* @brief Convenience wrapper around `softfloat_to_u32` returning a boolean.
*
* @param f `softfloat_t` input.
* @param out `softfloat_u32_t*` output pointer.
* @return `true` on success, `false` on failure.
*/
static inline bool softfloat_try_to_u32(softfloat_t f, softfloat_u32_t* out) {
return softfloat_to_u32(f, out) == softfloat_OK;
}
/**
* @brief Expression-style helper: returns the converted value and optionally reports the error.
*
* @param f `softfloat_t` input.
* @param err Optional output pointer (`softfloat_err_t*`). May be NULL.
* @return Converted value on success; 0 on failure.
*/
softfloat_i32_t softfloat_i32(softfloat_t f, softfloat_err_t* err) {
softfloat_i32_t out = 0;
const softfloat_err_t e = softfloat_to_i32(f, &out);
if (err != NULL)
*err = e;
return out;
}
/**
* @brief Expression-style helper: returns the converted value and optionally reports the error.
*
* @param f `softfloat_t` input.
* @param err Optional output pointer (`softfloat_err_t*`). May be NULL.
* @return Converted value on success; 0 on failure.
*/
softfloat_u32_t softfloat_u32(softfloat_t f, softfloat_err_t* err) {
softfloat_u32_t out = 0;
const softfloat_err_t e = softfloat_to_u32(f, &out);
if (err != NULL)
*err = e;
return out;
}
/**
* @brief Equality comparison.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return `true` if equal, `false` otherwise.
*
* Notes:
* - NaNs compare unequal to everything, including themselves.
* - +0 compares equal to -0.
*/
bool softfloat_eq(softfloat_t lhs, softfloat_t rhs) {
if (softfloat_isnan(lhs) || softfloat_isnan(rhs))
return false;
if (softfloat_isfinite(lhs) && softfloat_isfinite(rhs) &&
lhs.mantissa == 0 && rhs.mantissa == 0) {
return true;
}
return lhs.mantissa == rhs.mantissa && lhs.exponent == rhs.exponent &&
lhs.negative == rhs.negative;
}
/**
* @brief Not-equal comparison.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return `true` if not equal, `false` if equal.
*
* Notes:
* - NaNs are unordered and compare unequal to everything, including themselves.
* - +0 compares equal to -0, so `softfloat_ne(+0, -0)` returns `false`.
*/
static inline bool softfloat_ne(softfloat_t lhs, softfloat_t rhs) {
return !softfloat_eq(lhs, rhs);
}
/**
* @brief Less-than comparison.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return `true` if `lhs < rhs`, `false` otherwise.
*
* Notes:
* - NaNs are unordered: any comparison with NaN returns false.
* - +0 is not less than -0 (they compare equal).
*/
bool softfloat_lt(softfloat_t lhs, softfloat_t rhs) {
if (softfloat_isnan(lhs) || softfloat_isnan(rhs) || softfloat_eq(lhs, rhs))
return false;
if (lhs.negative != rhs.negative)
return lhs.negative;
const bool lhs_smaller_magnitude =
(lhs.exponent < rhs.exponent) ||
(lhs.exponent == rhs.exponent && lhs.mantissa < rhs.mantissa);
return lhs.negative != lhs_smaller_magnitude;
}
/**
* @brief Greater-than comparison.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return `true` if `lhs > rhs`, `false` otherwise.
*
* Notes:
* - NaNs are unordered: any comparison with NaN returns false.
*/
bool softfloat_gt(softfloat_t lhs, softfloat_t rhs) {
if (softfloat_isnan(lhs) || softfloat_isnan(rhs))
return false;
return !softfloat_lt(lhs, rhs) && !softfloat_eq(lhs, rhs);
}
/**
* @brief Less-or-equal comparison.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return `true` if `lhs <= rhs`, `false` otherwise.
*
* Notes:
* - NaNs are unordered: any comparison with NaN returns false.
* - +0 compares equal to -0.
*/
static inline bool softfloat_le(softfloat_t lhs, softfloat_t rhs) {
return softfloat_lt(lhs, rhs) || softfloat_eq(lhs, rhs);
}
/**
* @brief Greater-or-equal comparison.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return `true` if `lhs >= rhs`, `false` otherwise.
*
* Notes:
* - NaNs are unordered: any comparison with NaN returns false.
* - +0 compares equal to -0.
*/
static inline bool softfloat_ge(softfloat_t lhs, softfloat_t rhs) {
return softfloat_gt(lhs, rhs) || softfloat_eq(lhs, rhs);
}
static inline softfloat_u32_t softfloat__shr1_sticky_u32(softfloat_u32_t x) {
return (x >> 1) | (x & 1u);
}
/**
* @brief Add two values.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return Sum as `softfloat_t`.
*
* Special cases:
* - NaN propagates: if either operand is NaN, result is NaN.
* - inf + inf (same sign) => that infinity; inf + (-inf) => NaN.
* - Exact cancellation results in +0.
*
* Rounding: round-to-nearest, ties-to-even.
*/
softfloat_t softfloat_add(softfloat_t lhs, softfloat_t rhs) {
softfloat_t a = lhs;
softfloat_t b = rhs;
if (softfloat_isnan(a) || softfloat_isnan(b))
return softfloat_nan(softfloat_MANT24_MASK);
if (softfloat_isinf(a) && softfloat_isinf(b)) {
if (a.negative == b.negative)
return a;
return softfloat_nan(softfloat_MANT24_MASK);
}
if (softfloat_isinf(a))
return a;
if (softfloat_isinf(b))
return b;
if (a.mantissa == 0 && b.mantissa == 0) {
return softfloat_zero(a.negative && b.negative);
}
if (a.exponent < b.exponent) {
const softfloat_t tmp = a;
a = b;
b = tmp;
}
a.mantissa <<= 3;
b.mantissa <<= 3;
while (a.exponent > b.exponent) {
b.mantissa = softfloat__shr1_sticky_u32(b.mantissa);
b.exponent++;
}
softfloat_t sum = softfloat_make(
(a.mantissa >= b.mantissa) ? a.negative : b.negative, a.exponent, 0);
if (a.negative == b.negative) {
sum.mantissa = a.mantissa + b.mantissa;
} else if (a.mantissa >= b.mantissa) {
sum.mantissa = a.mantissa - b.mantissa;
} else {
sum.mantissa = b.mantissa - a.mantissa;
}
while (sum.mantissa < (softfloat_HIDDEN_BIT << 3) &&
sum.exponent > softfloat_EXP_MIN) {
sum.mantissa <<= 1;
sum.exponent--;
}
while (sum.mantissa >= (softfloat_MANT24_ONE << 3)) {
sum.mantissa = softfloat__shr1_sticky_u32(sum.mantissa);
sum.exponent++;
}
const softfloat_u32_t g = (sum.mantissa >> 2) & 1u;
const softfloat_u32_t r = (sum.mantissa >> 1) & 1u;
const softfloat_u32_t s = sum.mantissa & 1u;
sum.mantissa >>= 3;
if (g && (r || s || (sum.mantissa & 1u))) {
sum.mantissa++;
if (sum.mantissa == softfloat_MANT24_ONE) {
sum.mantissa >>= 1;
sum.exponent++;
}
}
if (sum.exponent >= softfloat_EXP_SPECIAL)
return softfloat_inf(sum.negative);
if (sum.mantissa == 0)
return softfloat_zero(false);
return sum;
}
/**
* @brief Subtract two values (lhs - rhs).
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return Difference as `softfloat_t`.
*
* Semantics match `softfloat_add` with the sign of `rhs` flipped.
*/
softfloat_t softfloat_sub(softfloat_t lhs, softfloat_t rhs) {
rhs.negative = !rhs.negative;
return softfloat_add(lhs, rhs);
}
/**
* @brief Multiply two values.
*
* @param lhs `softfloat_t` left operand.
* @param rhs `softfloat_t` right operand.
* @return Product as `softfloat_t`.
*
* Special cases:
* - NaN propagates.
* - inf * 0 => NaN.
* - inf * finite => inf with sign = XOR of signs.
*
* Rounding: round-to-nearest, ties-to-even.
*/
softfloat_t softfloat_mul(softfloat_t lhs, softfloat_t rhs) {
softfloat_t a = lhs;
softfloat_t b = rhs;
if (softfloat_isnan(a) || softfloat_isnan(b))
return softfloat_nan(softfloat_MANT24_MASK);
if (softfloat_isinf(a) || softfloat_isinf(b)) {
if ((softfloat_isfinite(a) && a.mantissa == 0) ||
(softfloat_isfinite(b) && b.mantissa == 0)) {
return softfloat_nan(softfloat_MANT24_MASK);
}
return softfloat_inf(a.negative != b.negative);
}
if (a.mantissa == 0 || b.mantissa == 0)
return softfloat_zero(a.negative != b.negative);
int exponent = a.exponent + b.exponent + 1;
const bool negative = (a.negative != b.negative);
const softfloat_u32_t a_hi = a.mantissa >> 12;
const softfloat_u32_t a_lo = a.mantissa & ((1u << 12) - 1u);
const softfloat_u32_t b_hi = b.mantissa >> 12;
const softfloat_u32_t b_lo = b.mantissa & ((1u << 12) - 1u);
const softfloat_u32_t hihi = a_hi * b_hi;
const softfloat_u32_t hilo = a_hi * b_lo;
const softfloat_u32_t lohi = a_lo * b_hi;
const softfloat_u32_t lolo = a_lo * b_lo;
softfloat_u32_t mantissa_low =
lolo +
((hilo & ((1u << 12) - 1u)) + (lohi & ((1u << 12) - 1u))) * (1u << 12);
softfloat_u32_t mantissa =
hihi + (hilo >> 12) + (lohi >> 12) + (mantissa_low >> 24);
mantissa_low &= softfloat_MANT24_MASK;
while (mantissa < softfloat_HIDDEN_BIT && exponent > softfloat_EXP_MIN) {
mantissa = (mantissa << 1) | (mantissa_low >> 23);
mantissa_low = (mantissa_low << 1) & softfloat_MANT24_MASK;
exponent--;
}
while (exponent < softfloat_EXP_MIN) {
const softfloat_u32_t lost = mantissa_low & 1u;
mantissa_low = (mantissa_low + ((mantissa & 1u) << 24)) >> 1;
mantissa_low |= lost;
mantissa >>= 1;
exponent++;
}
if ((mantissa_low >> 23) != 0 &&
((mantissa_low & ((1u << 23) - 1u)) != 0 || (mantissa & 1u) != 0)) {
mantissa++;
if (mantissa == softfloat_MANT24_ONE) {
mantissa >>= 1;
exponent++;
}
}
if (exponent >= softfloat_EXP_SPECIAL)
return softfloat_inf(negative);
return softfloat_make(negative, exponent, mantissa);
}
/**
* @brief Divide two values (a / b).
*
* @param a `softfloat_t` numerator.
* @param b `softfloat_t` denominator.
* @return Quotient as `softfloat_t`.
*
* Special cases:
* - NaN propagates.
* - inf / inf => NaN.
* - 0 / 0 => NaN.
* - finite / 0 => inf (with sign = XOR of signs).
* - 0 / finite => signed zero.
*
* Rounding: round-to-nearest, ties-to-even.
*/
softfloat_t softfloat_div(softfloat_t a, softfloat_t b) {
if (softfloat_isnan(a) || softfloat_isnan(b) ||
(softfloat_isinf(a) && softfloat_isinf(b)) ||
(softfloat_isfinite(a) && softfloat_isfinite(b) && a.mantissa == 0 &&
b.mantissa == 0)) {
return softfloat_nan(softfloat_MANT24_MASK);
}
const bool negative = (a.negative != b.negative);
if (softfloat_isinf(b))
return softfloat_zero(negative);
if (softfloat_isinf(a))
return softfloat_inf(negative);
if (b.mantissa == 0)
return softfloat_inf(negative);
if (a.mantissa == 0)
return softfloat_zero(negative);
softfloat_u32_t mantissa = a.mantissa / b.mantissa;
softfloat_u32_t remainder = a.mantissa % b.mantissa;
int exponent = a.exponent - b.exponent + 23;
while (mantissa < softfloat_HIDDEN_BIT && exponent > softfloat_EXP_MIN) {
remainder <<= 1;
mantissa = (mantissa << 1) + (remainder / b.mantissa);
remainder %= b.mantissa;
exponent--;
}
while (exponent < softfloat_EXP_MIN) {
const softfloat_u32_t lost = remainder & 1u;
remainder = (remainder + ((mantissa & 1u) * b.mantissa)) >> 1;
remainder |= lost;
mantissa >>= 1;
exponent++;
}
if ((remainder << 1) > b.mantissa ||
((remainder << 1) == b.mantissa && (mantissa & 1u) != 0)) {
mantissa++;
if (mantissa == softfloat_MANT24_ONE) {
mantissa >>= 1;
exponent++;
}
}
if (exponent >= softfloat_EXP_SPECIAL)
return softfloat_inf(negative);
return softfloat_make(negative, exponent, mantissa);
}
#endif /* softfloat_H */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment