Skip to content

Instantly share code, notes, and snippets.

@mlt
Last active November 4, 2025 19:50
Show Gist options
  • Select an option

  • Save mlt/2713e66ddc2297b581459bab909b7967 to your computer and use it in GitHub Desktop.

Select an option

Save mlt/2713e66ddc2297b581459bab909b7967 to your computer and use it in GitHub Desktop.
Benchmark of different ways to convert Allegro 4 int color to ALLEGRO_COLOR in Allegro5 to be considered for allegro4-to-5
#include <stdio.h>
#include <allegro.h>
#include <immintrin.h>
/*
MSVC compiler flags used:
/Oy /O2 /Ot /Oi /Ob2 /Gy /arch:AVX /fp:fast
Elapsed time for a5color_original: 88.65 seconds
Elapsed time for a5color_manual: 81.68 seconds
Elapsed time for a5color_manual2: 73.42 seconds
Elapsed time for a5color_blank: 12.96 seconds
Elapsed time for a5color_avx: 24.57 seconds
Elapsed time for a5color_avx2: 22.08 seconds
We subtract the blank time from all times to get net times and normalize wrt formal conversion:
Elapsed time for a5color_original 88.65 seconds 75.69 100%
Elapsed time for a5color_manual 81.68 seconds 68.72 91%
Elapsed time for a5color_manual2 73.42 seconds 60.46 80%
Elapsed time for a5color_blank 12.96 seconds 0 0%
Elapsed time for a5color_avx 24.57 seconds 11.61 15%
Elapsed time for a5color_avx2 22.08 seconds 9.12 12%
*/
static __m128i rgb_shifts;
typedef ALLEGRO_COLOR (*func) (int a4color);
static ALLEGRO_COLOR a5color_original(const int a4color)
{
return a4color != MASK_COLOR_32 ?
al_map_rgb(getr32(a4color), getg32(a4color), getb32(a4color)) :
(ALLEGRO_COLOR) { 0 };
}
static ALLEGRO_COLOR a5color_manual(const int a4color)
{
return a4color != MASK_COLOR_32 ?
/* this is incorrect for Windows but does not matter for a test */
al_map_rgb(a4color & 255, (a4color >> 8) & 255, (a4color >> 16) & 255) :
(ALLEGRO_COLOR) {
0
};
}
static ALLEGRO_COLOR a5color_manual2(const int a4color)
{
return a4color != MASK_COLOR_32 ?
/* this is incorrect for Windows but does not matter for a test */
(ALLEGRO_COLOR) { (a4color & 255)/255.0f, ((a4color >> 8) & 255)/255.0f, ((a4color >> 16) & 255)/255.0f, 1.0f }
: (ALLEGRO_COLOR) { 0 };
}
static ALLEGRO_COLOR a5color_avx(const int a4color)
{
if (a4color == MASK_COLOR_32)
return (ALLEGRO_COLOR) { 0 };
ALLEGRO_COLOR ret;
__m128i ci = _mm_shuffle_epi8(_mm_set_epi32(0, 0, 0x000000FF, a4color), rgb_shifts);
__m128 c = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(ci));
__m128 f = _mm_mul_ps(c, _mm_set1_ps(1.0 / 255.0f));
_mm_storeu_ps((float *)&ret, f);
return ret;
}
static ALLEGRO_COLOR a5color_avx2(int a4color)
{
if (a4color == MASK_COLOR_32)
return (ALLEGRO_COLOR) { 0 };
a4color |= 0xFF000000;
ALLEGRO_COLOR ret;
/* we don't adjust shifts for this case, but for the sake of a test, it does not matter */
__m128i ci = _mm_shuffle_epi8(_mm_loadu_si32(&a4color), rgb_shifts);
__m128 c = _mm_cvtepi32_ps(_mm_cvtepu8_epi32(ci));
__m128 f = _mm_mul_ps(c, _mm_set1_ps(1.0 / 255.0f));
_mm_storeu_ps((float *)&ret, f);
return ret;
}
static ALLEGRO_COLOR a5color_blank(const int a4color)
{
return (ALLEGRO_COLOR) { 0 };
}
#define BENCHMARK(f) benchmark(f, #f)
static void benchmark(func f, const char *name) {
clock_t start_time = clock();
for (long long i = 0; i < 1000000000; i++) {
// f(0x00ff00ff);
f(0x000000ff);
f(0x0000ff00);
f(0x00ff0000);
f(0x00000000);
f(0x00000000);
f(0x000000ff);
f(0x0000ff00);
f(0x00ff0000);
f(0x00000000);
f(0x00000000);
}
clock_t end_time = clock();
double elapsed_time_seconds = (double)(end_time - start_time) / CLOCKS_PER_SEC;
printf("Elapsed time for %s: %.02f seconds\n", name, elapsed_time_seconds);
}
#pragma optimize( "", off )
int main() {
rgb_shifts = _mm_setr_epi8(2, 1, 0, 4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
BENCHMARK(a5color_original);
BENCHMARK(a5color_manual);
BENCHMARK(a5color_manual2);
BENCHMARK(a5color_blank);
BENCHMARK(a5color_avx);
BENCHMARK(a5color_avx2);
return 0;
}
#pragma optimize( "", on )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment