Last active
November 4, 2025 19:50
-
-
Save mlt/2713e66ddc2297b581459bab909b7967 to your computer and use it in GitHub Desktop.
Benchmark of different ways to convert Allegro 4 int color to ALLEGRO_COLOR in Allegro5 to be considered for allegro4-to-5
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <stdio.h> | |
| #include <allegro.h> | |
| #include <immintrin.h> | |
| /* | |
| MSVC compiler flags used: | |
| /Oy /O2 /Ot /Oi /Ob2 /Gy /arch:AVX /fp:fast | |
| Elapsed time for a5color_original: 88.65 seconds | |
| Elapsed time for a5color_manual: 81.68 seconds | |
| Elapsed time for a5color_manual2: 73.42 seconds | |
| Elapsed time for a5color_blank: 12.96 seconds | |
| Elapsed time for a5color_avx: 24.57 seconds | |
| Elapsed time for a5color_avx2: 22.08 seconds | |
| We subtract the blank time from all times to get net times and normalize wrt formal conversion: | |
| Elapsed time for a5color_original 88.65 seconds 75.69 100% | |
| Elapsed time for a5color_manual 81.68 seconds 68.72 91% | |
| Elapsed time for a5color_manual2 73.42 seconds 60.46 80% | |
| Elapsed time for a5color_blank 12.96 seconds 0 0% | |
| Elapsed time for a5color_avx 24.57 seconds 11.61 15% | |
| Elapsed time for a5color_avx2 22.08 seconds 9.12 12% | |
| */ | |
| static __m128i rgb_shifts; | |
| typedef ALLEGRO_COLOR (*func) (int a4color); | |
| static ALLEGRO_COLOR a5color_original(const int a4color) | |
| { | |
| return a4color != MASK_COLOR_32 ? | |
| al_map_rgb(getr32(a4color), getg32(a4color), getb32(a4color)) : | |
| (ALLEGRO_COLOR) { 0 }; | |
| } | |
| static ALLEGRO_COLOR a5color_manual(const int a4color) | |
| { | |
| return a4color != MASK_COLOR_32 ? | |
| /* this is incorrect for Windows but does not matter for a test */ | |
| al_map_rgb(a4color & 255, (a4color >> 8) & 255, (a4color >> 16) & 255) : | |
| (ALLEGRO_COLOR) { | |
| 0 | |
| }; | |
| } | |
| static ALLEGRO_COLOR a5color_manual2(const int a4color) | |
| { | |
| return a4color != MASK_COLOR_32 ? | |
| /* this is incorrect for Windows but does not matter for a test */ | |
| (ALLEGRO_COLOR) { (a4color & 255)/255.0f, ((a4color >> 8) & 255)/255.0f, ((a4color >> 16) & 255)/255.0f, 1.0f } | |
| : (ALLEGRO_COLOR) { 0 }; | |
| } | |
| static ALLEGRO_COLOR a5color_avx(const int a4color) | |
| { | |
| if (a4color == MASK_COLOR_32) | |
| return (ALLEGRO_COLOR) { 0 }; | |
| ALLEGRO_COLOR ret; | |
| __m128i ci = _mm_shuffle_epi8(_mm_set_epi32(0, 0, 0x000000FF, a4color), rgb_shifts); | |
| __m128 c = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(ci)); | |
| __m128 f = _mm_mul_ps(c, _mm_set1_ps(1.0 / 255.0f)); | |
| _mm_storeu_ps((float *)&ret, f); | |
| return ret; | |
| } | |
| static ALLEGRO_COLOR a5color_avx2(int a4color) | |
| { | |
| if (a4color == MASK_COLOR_32) | |
| return (ALLEGRO_COLOR) { 0 }; | |
| a4color |= 0xFF000000; | |
| ALLEGRO_COLOR ret; | |
| /* we don't adjust shifts for this case, but for the sake of a test, it does not matter */ | |
| __m128i ci = _mm_shuffle_epi8(_mm_loadu_si32(&a4color), rgb_shifts); | |
| __m128 c = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(ci)); | |
| __m128 f = _mm_mul_ps(c, _mm_set1_ps(1.0 / 255.0f)); | |
| _mm_storeu_ps((float *)&ret, f); | |
| return ret; | |
| } | |
| static ALLEGRO_COLOR a5color_blank(const int a4color) | |
| { | |
| return (ALLEGRO_COLOR) { 0 }; | |
| } | |
| #define BENCHMARK(f) benchmark(f, #f) | |
| static void benchmark(func f, const char *name) { | |
| clock_t start_time = clock(); | |
| for (long long i = 0; i < 1000000000; i++) { | |
| // f(0x00ff00ff); | |
| f(0x000000ff); | |
| f(0x0000ff00); | |
| f(0x00ff0000); | |
| f(0x00000000); | |
| f(0x00000000); | |
| f(0x000000ff); | |
| f(0x0000ff00); | |
| f(0x00ff0000); | |
| f(0x00000000); | |
| f(0x00000000); | |
| } | |
| clock_t end_time = clock(); | |
| double elapsed_time_seconds = (double)(end_time - start_time) / CLOCKS_PER_SEC; | |
| printf("Elapsed time for %s: %.02f seconds\n", name, elapsed_time_seconds); | |
| } | |
| #pragma optimize( "", off ) | |
| int main() { | |
| rgb_shifts = _mm_setr_epi8(2, 1, 0, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); | |
| BENCHMARK(a5color_original); | |
| BENCHMARK(a5color_manual); | |
| BENCHMARK(a5color_manual2); | |
| BENCHMARK(a5color_blank); | |
| BENCHMARK(a5color_avx); | |
| BENCHMARK(a5color_avx2); | |
| return 0; | |
| } | |
| #pragma optimize( "", on ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment