Created
September 21, 2024 21:27
-
-
Save diaphragm-workplace/6b73bb65b888d72c966839d43f295a3f to your computer and use it in GitHub Desktop.
float16 golang test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| /* | |
| #cgo CFLAGS: -march=native -mtune=native -Ofast -flto | |
| #cgo LDFLAGS: -march=native -mtune=native -Ofast | |
| #include <stdint.h> | |
| // Optimized out into a single vector instruction on x86 compatibles and aarch64 | |
| void float32_to_float16( | |
| float a, float b, float c, float d, | |
| float e, float f, float g, float h, | |
| uint16_t* res_a, uint16_t* res_b, uint16_t* res_c, uint16_t* res_d, | |
| uint16_t* res_e, uint16_t* res_f, uint16_t* res_g, uint16_t* res_h) | |
| { | |
| _Float16 tmp_a = (_Float16)a; | |
| _Float16 tmp_b = (_Float16)b; | |
| _Float16 tmp_c = (_Float16)c; | |
| _Float16 tmp_d = (_Float16)d; | |
| _Float16 tmp_e = (_Float16)e; | |
| _Float16 tmp_f = (_Float16)f; | |
| _Float16 tmp_g = (_Float16)g; | |
| _Float16 tmp_h = (_Float16)h; | |
| *res_a = *(uint16_t*)&tmp_a; | |
| *res_b = *(uint16_t*)&tmp_b; | |
| *res_c = *(uint16_t*)&tmp_c; | |
| *res_d = *(uint16_t*)&tmp_d; | |
| *res_e = *(uint16_t*)&tmp_e; | |
| *res_f = *(uint16_t*)&tmp_f; | |
| *res_g = *(uint16_t*)&tmp_g; | |
| *res_h = *(uint16_t*)&tmp_h; | |
| } | |
| void float16_to_float32( | |
| uint16_t a, uint16_t b, uint16_t c, uint16_t d, | |
| uint16_t e, uint16_t f, uint16_t g, uint16_t h, | |
| float* res_a, float* res_b, float* res_c, float* res_d, | |
| float* res_e, float* res_f, float* res_g, float* res_h) | |
| { | |
| _Float16 tmp_a = *(_Float16*)&a; | |
| _Float16 tmp_b = *(_Float16*)&b; | |
| _Float16 tmp_c = *(_Float16*)&c; | |
| _Float16 tmp_d = *(_Float16*)&d; | |
| _Float16 tmp_e = *(_Float16*)&e; | |
| _Float16 tmp_f = *(_Float16*)&f; | |
| _Float16 tmp_g = *(_Float16*)&g; | |
| _Float16 tmp_h = *(_Float16*)&h; | |
| *res_a = (float)tmp_a; | |
| *res_b = (float)tmp_b; | |
| *res_c = (float)tmp_c; | |
| *res_d = (float)tmp_d; | |
| *res_e = (float)tmp_e; | |
| *res_f = (float)tmp_f; | |
| *res_g = (float)tmp_g; | |
| *res_h = (float)tmp_h; | |
| } | |
| */ | |
| import "C" | |
| // No need to import the entire fmt and math packages just to prove a point... | |
| // Not a big performance impact, just me hating myself. | |
| import "os" | |
| import "time" | |
| import "strconv" | |
| // Software implementation for comparison, see comment near the end | |
| // Type defined by this package is also equivalent to uint16 | |
| import "github.com/x448/float16" | |
| const ( | |
| E = 2.71828182845904523536028747135266249775724709369995957496696763 | |
| Pi = 3.14159265358979323846264338327950288419716939937510582097494459 | |
| Phi = 1.61803398874989484820458683436563811772030917980576286213544862 | |
| Sqrt2 = 1.41421356237309504880168872420969807856967187537694807317667974 | |
| SqrtE = 1.64872127070012814684865078781416357165377610071014801157507931 | |
| SqrtPi = 1.77245385090551602729816748334114518279754945612238712821380779 | |
| SqrtPhi = 1.27201964951406896425242246173749149171560804184009624861664038 | |
| Ln2 = 0.693147180559945309417232121458176568075500134360255254120680009 | |
| ) | |
| func log(message string) { | |
| os.Stdout.WriteString(message + "\n") | |
| } | |
| func printSince(a time.Time, name string) { | |
| log(name + " took " + time.Since(a).String()) | |
| } | |
| func fF32(a float32) string { | |
| return (strconv.FormatFloat(float64(a), 'f', -1, 32)) | |
| } | |
| func format8float32(a, b, c, d, e, f, g, h float32) string { | |
| return (fF32(a) + " " + fF32(b) + " " + fF32(c) + " " + fF32(d) + " " + fF32(e) + " " + fF32(f) + " " + fF32(g) + " " + fF32(h)) | |
| } | |
| func f32tof16( | |
| a, b, c, d, e, f, g, h float32, | |
| resA, resB, resC, resD, resE, resF, resG, resH *uint16) { | |
| // I *will* regret this later. | |
| defer printSince(time.Now(), "f32tof16") | |
| C.float32_to_float16( | |
| C.float(a), C.float(b), C.float(c), C.float(d), | |
| C.float(e), C.float(f), C.float(g), C.float(h), | |
| (*C.uint16_t)(resA), (*C.uint16_t)(resB), (*C.uint16_t)(resC), (*C.uint16_t)(resD), | |
| (*C.uint16_t)(resE), (*C.uint16_t)(resF), (*C.uint16_t)(resG), (*C.uint16_t)(resH), | |
| ) | |
| } | |
| func f16tof32( | |
| a, b, c, d, e, f, g, h uint16, | |
| resA, resB, resC, resD, resE, resF, resG, resH *float32) { | |
| defer printSince(time.Now(), "f16tof32") | |
| C.float16_to_float32( | |
| C.uint16_t(a), C.uint16_t(b), C.uint16_t(c), C.uint16_t(d), | |
| C.uint16_t(e), C.uint16_t(f), C.uint16_t(g), C.uint16_t(h), | |
| (*C.float)(resA), (*C.float)(resB), (*C.float)(resC), (*C.float)(resD), | |
| (*C.float)(resE), (*C.float)(resF), (*C.float)(resG), (*C.float)(resH), | |
| ) | |
| } | |
| func f16tof32_without_log( | |
| a, b, c, d, e, f, g, h uint16, | |
| resA, resB, resC, resD, resE, resF, resG, resH *float32) { | |
| C.float16_to_float32( | |
| C.uint16_t(a), C.uint16_t(b), C.uint16_t(c), C.uint16_t(d), | |
| C.uint16_t(e), C.uint16_t(f), C.uint16_t(g), C.uint16_t(h), | |
| (*C.float)(resA), (*C.float)(resB), (*C.float)(resC), (*C.float)(resD), | |
| (*C.float)(resE), (*C.float)(resF), (*C.float)(resG), (*C.float)(resH), | |
| ) | |
| } | |
| func main() { | |
| var f16f32 [65536]float32 | |
| a, b, c, d, e, f, g, h := new(uint16), new(uint16), new(uint16), new(uint16), new(uint16), new(uint16), new(uint16), new(uint16) | |
| i, j, k, l, m, n, o, p := new(float32), new(float32), new(float32), new(float32), new(float32), new(float32), new(float32), new(float32) | |
| // originally used *uint16, sigsegv, fastest solution is this | |
| log("Originals: " + format8float32(E, Pi, Phi, Sqrt2, SqrtE, SqrtPi, SqrtPhi, Ln2)) | |
| f32tof16(E, Pi, Phi, Sqrt2, SqrtE, SqrtPi, SqrtPhi, Ln2, | |
| a, b, c, d, e, f, g, h) | |
| f16tof32(*a, *b, *c, *d, *e, *f, *g, *h, i, j, k, l, m, n, o, p) | |
| // That's a lotta arguements! And barely readable. | |
| // It's worth to demonstrate the hypothetical performance benefit of having this implemented on language level and a loop getting optimized this way, however. | |
| // I am not sure if it's spelled optimized or optimised, and at this point I am too afraid to ask... | |
| log("Converted: " + format8float32(*i, *j, *k, *l, *m, *n, *o, *p) + | |
| "\n Making a lookup table using it (also demostrates the speed of looping through 65535 values)...") | |
| func() { | |
| defer printSince(time.Now(), "Generation of little bobby tables") | |
| ti, tj, tk, tl, tm, tn, to, tp := new(float32), new(float32), new(float32), new(float32), new(float32), new(float32), new(float32), new(float32) | |
| past_one := false | |
| for c := uint16(0); c != 0 || !past_one; c += 8 { | |
| past_one = true | |
| // Gawd, this looks so wrong... | |
| f16tof32_without_log(c, c+1, c+2, c+3, c+4, c+5, c+6, c+7, ti, tj, tk, tl, tm, tn, to, tp) | |
| f16f32[c], f16f32[c+1], f16f32[c+2], f16f32[c+3], f16f32[c+4], f16f32[c+5], f16f32[c+6], f16f32[c+7] = *ti, *tj, *tk, *tl, *tm, *tn, *to, *tp | |
| //log(strconv.FormatUint(uint64(c), 10)) // the debug | |
| } | |
| }() | |
| // I am too tired to scroll back and make those into actual functions... | |
| /* | |
| func() { | |
| var tmp_str string | |
| for i, j := range f16f32 { | |
| tmp_str += strconv.FormatUint(uint64(i), 10) + ": " + fF32(j) | |
| log(strconv.FormatUint(uint64(i), 10)) // the debug | |
| } | |
| log(tmp_str) | |
| }() | |
| */ // was too slow | |
| log("Originals: " + format8float32(E, Pi, Phi, Sqrt2, SqrtE, SqrtPi, SqrtPhi, Ln2)) | |
| // At this point I don't know what I am doing anymore... | |
| func() { | |
| defer printSince(time.Now(), "float32 >> float16 in software") | |
| // Nvm, I am stupid. I can't just lookup backwards (unless the float32 rounds up to a float16). | |
| // I used someone else's module. | |
| var ta, tb, tc, td, te, tf, tg, th uint16 | |
| ta, tb, tc, td, te, tf, tg, th = uint16(float16.Fromfloat32(*i)), uint16(float16.Fromfloat32(*j)), uint16(float16.Fromfloat32(*k)), uint16(float16.Fromfloat32(*l)), uint16(float16.Fromfloat32(*m)), uint16(float16.Fromfloat32(*n)), uint16(float16.Fromfloat32(*o)), uint16(float16.Fromfloat32(*p)) | |
| a, b, c, d, e, f, g, h = &ta, &tb, &tc, &td, &te, &tf, &tg, &th | |
| }() | |
| // Kill me. | |
| func() { | |
| defer printSince(time.Now(), "Lookup of float16 >> float32") | |
| i, j, k, l, m, n, o, p = &f16f32[*a], &f16f32[*b], &f16f32[*c], &f16f32[*d], &f16f32[*e], &f16f32[*f], &f16f32[*g], &f16f32[*h] | |
| }() | |
| log("Converted: " + format8float32(*i, *j, *k, *l, *m, *n, *o, *p)) | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment