Last active
November 5, 2025 22:47
-
-
Save vurtun/6a4f284f75e3133586b04b6692dac0fd to your computer and use it in GitHub Desktop.
animation sampling
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include <immintrin.h> // AVX, includes SSE2-SSE4.2 | |
| #include <assert.h> | |
| #include <math.h> | |
| #include <stdint.h> | |
| #include <pthread.h> | |
| #include <stdlib.h> | |
| #include <stdio.h> | |
| #define MAX_ANIM 1024 | |
| #define MAX_ANM_CLIPS 256 | |
| #define BATCH_SIZE 4 | |
| #define MAX_JOINTS 256 | |
| #define ANIM_TRK_ELM_CNT 7 // x, y, z, scl, rot_x, rot_y, rot_z | |
| #define align32 __attribute__((aligned(32))) | |
| // Animation track types | |
| enum anm_trks { | |
| ANM_TRK_POS, // 3 components (x, y, z) | |
| ANM_TRK_ROT, // 1 quaternion (32-bit encoded) | |
| ANM_TRK_SCL, // 1 component (uniform scale) | |
| ANM_TRK_CNT | |
| }; | |
| struct anm_clip { | |
| int joint_cnt; | |
| int frame_cnt; // Total number of frames | |
| float off[ANM_TRK_CNT]; // Quantization offset factors for pos, rot, scl | |
| float scl[ANM_TRK_CNT]; // Quantization scaling factors for pos, rot, scl | |
| struct { | |
| align32 unsigned short const *pos_x; | |
| align32 unsigned short const *pos_y; | |
| align32 unsigned short const *pos_z; | |
| align32 unsigned const *rot_pos; | |
| align32 unsigned short const *scl_s; | |
| } keys; | |
| struct { | |
| align32 unsigned short const *frame_to_key_pos; | |
| align32 unsigned short const *frame_to_key_rot; | |
| align32 unsigned short const *frame_to_key_scl; | |
| } blks; | |
| }; | |
| struct anm_cache { | |
| align32 float pos_x[2 * MAX_JOINTS]; | |
| align32 float pos_y[2 * MAX_JOINTS]; | |
| align32 float pos_z[2 * MAX_JOINTS]; | |
| align32 float scl_s[2 * MAX_JOINTS]; | |
| align32 float rot_x[2 * MAX_JOINTS]; | |
| align32 float rot_y[2 * MAX_JOINTS]; | |
| align32 float rot_z[2 * MAX_JOINTS]; | |
| align32 float rot_w[2 * MAX_JOINTS]; | |
| int cached_anim; | |
| int cached_frame; | |
| int cached_next; | |
| int prev_slot; | |
| }; | |
| struct anm_sys { | |
| short free_idx_cnt; | |
| short free_idx[MAX_ANIM]; | |
| align32 int anim[MAX_ANIM]; | |
| align32 float exact_frame[MAX_ANIM]; | |
| align32 int frame[MAX_ANIM]; | |
| align32 int frame_nxt[MAX_ANIM]; | |
| align32 int frame_cnt[MAX_ANIM]; | |
| align32 float t_0[MAX_ANIM]; | |
| align32 float t_1[MAX_ANIM]; | |
| struct anm_cache caches[MAX_ANIM]; | |
| }; | |
| static struct anm_sys anm_sys; | |
| enum anm_clip_id { | |
| ANM_CLIP_DEFAULT, | |
| ANM_CLIP_CNT, | |
| }; | |
| static const align32 unsigned short anm_def_pos_x[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| static const align32 unsigned short anm_def_pos_y[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| static const align32 unsigned short anm_def_pos_z[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| static const align32 unsigned anm_def_rot[] = { | |
| 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000, | |
| 0xc0000000, 0xc0000000, 0xc0000000, 0xc0000000 | |
| }; | |
| static const align32 unsigned short anm_def_scl[] = {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}; | |
| static const align32 unsigned short anm_def_frame_to_key_pos[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| static const align32 unsigned short anm_def_frame_to_key_rot[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| static const align32 unsigned short anm_def_frame_to_key_scl[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; | |
| static const struct anm_clip anm_def_clip = { | |
| .joint_cnt = 8, | |
| .frame_cnt = 1, | |
| .off = {0.0f, 0.0f, 0.0f}, | |
| .scl = {1.0f, 1.0f, 1.0f}, | |
| .keys = { | |
| .pos_x = anm_def_pos_x, | |
| .pos_y = anm_def_pos_y, | |
| .pos_z = anm_def_pos_z, | |
| .rot_pos = anm_def_rot, | |
| .scl_s = anm_def_scl, | |
| }, | |
| .blks = { | |
| .frame_to_key_pos = anm_def_frame_to_key_pos, | |
| .frame_to_key_rot = anm_def_frame_to_key_rot, | |
| .frame_to_key_scl = anm_def_frame_to_key_scl, | |
| }, | |
| }; | |
| static const struct anm_clip *anm_clips[ANM_CLIP_CNT] = { | |
| [ANM_CLIP_DEFAULT] = &anm_def_clip, | |
| }; | |
| extern void | |
| anm_init(struct anm_sys *ans) { | |
| ans->free_idx_cnt = MAX_ANIM; | |
| for (int i = 0; i < MAX_ANIM; ++i) { | |
| ans->free_idx[i] = MAX_ANIM - i - 1; | |
| } | |
| for (int i = 0; i < MAX_ANM_CLIPS; ++i) { | |
| ans->caches[i].cached_anim = -1; | |
| } | |
| } | |
| extern int | |
| anm_add(struct anm_sys *ans, enum anm_clip_id clip_id) { | |
| const struct anm_clip *clip = anm_clips[clip_id]; | |
| int anm = ans->free_idx[--ans->free_idx_cnt]; | |
| ans->anim[anm] = clip_id; | |
| ans->frame[anm] = 0; | |
| ans->frame_nxt[anm] = 1; | |
| ans->exact_frame[anm] = 0.0f; | |
| ans->frame_cnt[anm] = clip->frame_cnt; | |
| ans->t_0[anm] = 0.0f; | |
| ans->t_1[anm] = 0.0f; | |
| return anm; | |
| } | |
| extern void | |
| anm_del(struct anm_sys *ans, int anm) { | |
| ans->free_idx[ans->free_idx_cnt++] = anm; | |
| ans->anim[anm] = 0; | |
| ans->frame[anm] = 0; | |
| ans->frame_nxt[anm] = 0; | |
| ans->exact_frame[anm] = 0.0f; | |
| ans->frame_cnt[anm] = 0; | |
| ans->t_0[anm] = 0.0f; | |
| ans->t_1[anm] = 0.0f; | |
| } | |
| static inline void | |
| qdec_avx(__m256 *qout_x, __m256 *qout_y, __m256 *qout_z, __m256 *qout_w, const unsigned *qin) { | |
| __m256 half = _mm256_set1_ps(0.707106781f); | |
| __m256 inv_msk = _mm256_set1_ps(1.0f / 511.0f); | |
| __m256 one = _mm256_set1_ps(1.0f); | |
| __m256 zero = _mm256_setzero_ps(); | |
| __m256i q = _mm256_load_si256((__m256i*)qin); | |
| __m256i top = _mm256_srli_epi32(q, 30); | |
| __m256i mask = _mm256_set1_epi32(511); | |
| // Component 0 (highest non-top) | |
| __m256i mag0 = _mm256_and_si256(q, mask); | |
| __m256i negbit0 = _mm256_and_si256(_mm256_srli_epi32(q, 9), _mm256_set1_epi32(1)); | |
| q = _mm256_srli_epi32(q, 10); | |
| __m256 pos_val0 = _mm256_mul_ps(_mm256_cvtepi32_ps(mag0), _mm256_mul_ps(inv_msk, half)); | |
| __m256 sqr = _mm256_fmadd_ps(pos_val0, pos_val0, zero); | |
| __m256 neg_mask0 = _mm256_cvtepi32_ps(negbit0); | |
| __m256 val0 = _mm256_blendv_ps(pos_val0, _mm256_sub_ps(zero, pos_val0), neg_mask0); | |
| // Component 1 (mid non-top) | |
| __m256i mag1 = _mm256_and_si256(q, mask); | |
| __m256i negbit1 = _mm256_and_si256(_mm256_srli_epi32(q, 9), _mm256_set1_epi32(1)); | |
| q = _mm256_srli_epi32(q, 10); | |
| __m256 pos_val1 = _mm256_mul_ps(_mm256_cvtepi32_ps(mag1), _mm256_mul_ps(inv_msk, half)); | |
| sqr = _mm256_fmadd_ps(pos_val1, pos_val1, sqr); | |
| __m256 neg_mask1 = _mm256_cvtepi32_ps(negbit1); | |
| __m256 val1 = _mm256_blendv_ps(pos_val1, _mm256_sub_ps(zero, pos_val1), neg_mask1); | |
| // Component 2 (lowest non-top) | |
| __m256i mag2 = _mm256_and_si256(q, mask); | |
| __m256i negbit2 = _mm256_and_si256(_mm256_srli_epi32(q, 9), _mm256_set1_epi32(1)); | |
| q = _mm256_srli_epi32(q, 10); | |
| __m256 pos_val2 = _mm256_mul_ps(_mm256_cvtepi32_ps(mag2), _mm256_mul_ps(inv_msk, half)); | |
| sqr = _mm256_fmadd_ps(pos_val2, pos_val2, sqr); | |
| __m256 neg_mask2 = _mm256_cvtepi32_ps(negbit2); | |
| __m256 val2 = _mm256_blendv_ps(pos_val2, _mm256_sub_ps(zero, pos_val2), neg_mask2); | |
| // Missing component | |
| __m256 diff = _mm256_max_ps(_mm256_sub_ps(one, sqr), zero); | |
| __m256 root = _mm256_sqrt_ps(diff); | |
| __m256 mask_eq0 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(top, _mm256_set1_epi32(0))); | |
| __m256 mask_eq1 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(top, _mm256_set1_epi32(1))); | |
| __m256 mask_eq2 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(top, _mm256_set1_epi32(2))); | |
| __m256 mask_eq3 = _mm256_castsi256_ps(_mm256_cmpeq_epi32(top, _mm256_set1_epi32(3))); | |
| // Assign based on top (val0: highest non-top, val1: mid, val2: lowest non-top) | |
| *qout_x = _mm256_blendv_ps(val2, root, mask_eq0); | |
| __m256 temp_y = _mm256_blendv_ps(val1, val2, mask_eq0); | |
| *qout_y = _mm256_blendv_ps(temp_y, root, mask_eq1); | |
| __m256 temp_z_nonroot = _mm256_blendv_ps(val0, val1, _mm256_or_ps(mask_eq0, mask_eq1)); | |
| *qout_z = _mm256_blendv_ps(temp_z_nonroot, root, mask_eq2); | |
| *qout_w = _mm256_blendv_ps(val0, root, mask_eq3); | |
| } | |
| extern void | |
| anim_update(float* out, struct anm_sys *ans, int anm_start, int anm_end) { | |
| { | |
| // Precompute linear coefficients and update frames for all models | |
| __m256 zero = _mm256_setzero_ps(); | |
| __m256 one = _mm256_set1_ps(1.0f); | |
| __m256i one_i = _mm256_set1_epi32(1); | |
| __m256i zero_i = _mm256_setzero_si256(); // For lower clamp | |
| for (int i = anm_start; i < anm_end; i += 8) { | |
| __m256i frame_cnt = _mm256_load_si256((__m256i*)&ans->frame_cnt[i]); | |
| __m256 exact = _mm256_load_ps(&ans->exact_frame[i]); | |
| __m256i frame_i = _mm256_castps_si256(exact); // trunc toward zero | |
| __m256i frame_cnt_m1 = _mm256_sub_epi32(frame_cnt, one_i); | |
| __m256 frame_ps = _mm256_cvtepi32_ps(frame_i); | |
| __m256 frame_cnt_m1_ps = _mm256_cvtepi32_ps(frame_cnt_m1); | |
| __m256 gt_mask = _mm256_cmp_ps(frame_ps, frame_cnt_m1_ps, _CMP_GT_OQ); | |
| __m256i clamp_mask_i = _mm256_castps_si256(gt_mask); | |
| frame_i = _mm256_blendv_epi8(frame_i, frame_cnt_m1, clamp_mask_i); | |
| __m256 lt_mask = _mm256_cmp_ps(frame_ps, zero, _CMP_LT_OQ); // < 0? | |
| __m256i lt_mask_i = _mm256_castps_si256(lt_mask); | |
| frame_i = _mm256_blendv_epi8(frame_i, zero_i, lt_mask_i); | |
| __m256 t = _mm256_sub_ps(exact, _mm256_cvtepi32_ps(frame_i)); | |
| __m256i frame_1_tst = _mm256_add_epi32(frame_i, one_i); | |
| __m256 tst_ps = _mm256_cvtepi32_ps(frame_1_tst); | |
| __m256 cnt_ps = _mm256_cvtepi32_ps(frame_cnt); | |
| __m256 ge_mask = _mm256_cmp_ps(tst_ps, cnt_ps, _CMP_GE_OQ); | |
| __m256i sel_mask_i = _mm256_castps_si256(ge_mask); | |
| __m256i frame_1 = _mm256_blendv_epi8(frame_1_tst, frame_i, sel_mask_i); | |
| __m256 t_clamped = _mm256_min_ps(_mm256_max_ps(t, zero), one); | |
| // Compute linear basis functions | |
| __m256 h00 = _mm256_sub_ps(one, t_clamped); | |
| __m256 h01 = t_clamped; | |
| // Store | |
| _mm256_store_ps(&ans->t_0[i], h00); | |
| _mm256_store_ps(&ans->t_1[i], h01); | |
| _mm256_store_si256((__m256i*)&ans->frame_nxt[i], frame_1); | |
| _mm256_store_si256((__m256i*)&ans->frame[i], frame_i); | |
| } | |
| } | |
| for (int m = anm_start; m < anm_end; ++m) { | |
| int anm_id = ans->anim[m]; | |
| const struct anm_clip *clp = anm_clips[anm_id]; | |
| int frame = ans->frame[m]; | |
| int frame_next = ans->frame_nxt[m]; | |
| int key_idx_pos[2] = {clp->blks.frame_to_key_pos[frame], clp->blks.frame_to_key_pos[frame_next]}; | |
| int key_idx_rot[2] = {clp->blks.frame_to_key_rot[frame], clp->blks.frame_to_key_rot[frame_next]}; | |
| int key_idx_scl[2] = {clp->blks.frame_to_key_scl[frame], clp->blks.frame_to_key_scl[frame_next]}; | |
| const __m256 pos_off = _mm256_set1_ps(clp->off[ANM_TRK_POS]); | |
| const __m256 rot_off = _mm256_set1_ps(clp->off[ANM_TRK_ROT]); | |
| const __m256 scl_off = _mm256_set1_ps(clp->off[ANM_TRK_SCL]); | |
| const __m256 pos_scl = _mm256_set1_ps(clp->scl[ANM_TRK_POS]); | |
| const __m256 rot_scl = _mm256_set1_ps(clp->scl[ANM_TRK_ROT]); | |
| const __m256 scl_scl = _mm256_set1_ps(clp->scl[ANM_TRK_SCL]); | |
| __m256 h00 = _mm256_set1_ps(ans->t_0[m]); | |
| __m256 h01 = _mm256_set1_ps(ans->t_1[m]); | |
| const int stride = clp->joint_cnt; | |
| float *out_ptr = out + m * ANIM_TRK_ELM_CNT * stride; | |
| int anm_mod = ans->caches[m].cached_anim == anm_id; | |
| int full_hit = (ans->caches[m].cached_frame == frame && ans->caches[m].cached_next == frame_next); | |
| int advance = !full_hit && (ans->caches[m].cached_frame != -1) && (frame == ans->caches[m].cached_next); | |
| if (anm_mod || !full_hit) { | |
| if (!anm_mod && advance) { | |
| // Double-buffer toggle: Swap slots (new prev = old next; new next = old prev) | |
| ans->caches[m].prev_slot = 1 - ans->caches[m].prev_slot; | |
| ans->caches[m].cached_frame = frame; | |
| ans->caches[m].cached_next = frame_next; | |
| // Dequant only new frame_next to new next_slot | |
| int next_slot = 1 - ans->caches[m].prev_slot; // Now the free slot (old prev) | |
| int base_pos1 = key_idx_pos[1] * stride; | |
| int base_scl1 = key_idx_scl[1] * stride; | |
| int base_rot1 = key_idx_rot[1] * stride; | |
| for (int jj = 0; jj < clp->joint_cnt; jj += 8) { | |
| int j1 = jj; // Batch offset | |
| // Position frame 1 (new next) | |
| __m128i shorts1_x = _mm_load_si128((__m128i*)(clp->keys.pos_x + base_pos1 + j1)); | |
| __m256i mag1_x = _mm256_cvtepu16_epi32(shorts1_x); | |
| __m256 pos1_x = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag1_x), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_x[next_slot * stride + jj], pos1_x); | |
| __m128i shorts1_y = _mm_load_si128((__m128i*)(clp->keys.pos_y + base_pos1 + j1)); | |
| __m256i mag1_y = _mm256_cvtepu16_epi32(shorts1_y); | |
| __m256 pos1_y = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag1_y), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_y[next_slot * stride + jj], pos1_y); | |
| __m128i shorts1_z = _mm_load_si128((__m128i*)(clp->keys.pos_z + base_pos1 + j1)); | |
| __m256i mag1_z = _mm256_cvtepu16_epi32(shorts1_z); | |
| __m256 pos1_z = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag1_z), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_z[next_slot * stride + jj], pos1_z); | |
| // Scale frame 1 (new next) | |
| __m128i shorts_s1 = _mm_load_si128((__m128i*)(clp->keys.scl_s + base_scl1 + j1)); | |
| __m256i mag_s1 = _mm256_cvtepu16_epi32(shorts_s1); | |
| __m256 scl1 = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag_s1), scl_scl), scl_off); | |
| _mm256_store_ps(&ans->caches[m].scl_s[next_slot * stride + jj], scl1); | |
| // Rotation frame 1 (new next) | |
| align32 unsigned rot1_data[8]; | |
| __m256i rot1_packed = _mm256_load_si256((__m256i*)(clp->keys.rot_pos + base_rot1 + j1)); | |
| _mm256_store_si256((__m256i*)rot1_data, rot1_packed); | |
| __m256 rot1_x_raw, rot1_y_raw, rot1_z_raw, rot1_w_raw; | |
| qdec_avx(&rot1_x_raw, &rot1_y_raw, &rot1_z_raw, &rot1_w_raw, rot1_data); | |
| __m256 rot1_x = _mm256_add_ps(_mm256_mul_ps(rot1_x_raw, rot_scl), rot_off); | |
| _mm256_store_ps(&ans->caches[m].rot_x[next_slot * stride + jj], rot1_x); | |
| __m256 rot1_y = _mm256_add_ps(_mm256_mul_ps(rot1_y_raw, rot_scl), rot_off); | |
| _mm256_store_ps(&ans->caches[m].rot_y[next_slot * stride + jj], rot1_y); | |
| __m256 rot1_z = _mm256_add_ps(_mm256_mul_ps(rot1_z_raw, rot_scl), rot_off); | |
| _mm256_store_ps(&ans->caches[m].rot_z[next_slot * stride + jj], rot1_z); | |
| __m256 rot1_w = _mm256_add_ps(_mm256_mul_ps(rot1_w_raw, rot_scl), rot_off); | |
| _mm256_store_ps(&ans->caches[m].rot_w[next_slot * stride + jj], rot1_w); | |
| } | |
| } else { | |
| // Full miss/seek: Dequant both to fixed slots 0 (frame) / 1 (frame_next) | |
| ans->caches[m].prev_slot = 0; | |
| ans->caches[m].cached_frame = frame; | |
| ans->caches[m].cached_next = frame_next; | |
| // Dequant full pair to cache (slot 0: frame, slot 1: frame_next) | |
| for (int jj = 0; jj < clp->joint_cnt; jj += 8) { | |
| int base_pos0 = key_idx_pos[0] * stride + jj; | |
| int base_pos1 = key_idx_pos[1] * stride + jj; | |
| int base_scl0 = key_idx_scl[0] * stride + jj; | |
| int base_scl1 = key_idx_scl[1] * stride + jj; | |
| int base_rot0 = key_idx_rot[0] * stride + jj; | |
| int base_rot1 = key_idx_rot[1] * stride + jj; | |
| // Position frame 0 (slot 0) | |
| __m128i shorts0_x = _mm_load_si128((__m128i*)(clp->keys.pos_x + base_pos0)); | |
| __m256i mag0_x = _mm256_cvtepu16_epi32(shorts0_x); | |
| __m256 pos0_x = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag0_x), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_x[0 * stride + jj], pos0_x); | |
| __m128i shorts0_y = _mm_load_si128((__m128i*)(clp->keys.pos_y + base_pos0)); | |
| __m256i mag0_y = _mm256_cvtepu16_epi32(shorts0_y); | |
| __m256 pos0_y = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag0_y), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_y[0 * stride + jj], pos0_y); | |
| __m128i shorts0_z = _mm_load_si128((__m128i*)(clp->keys.pos_z + base_pos0)); | |
| __m256i mag0_z = _mm256_cvtepu16_epi32(shorts0_z); | |
| __m256 pos0_z = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag0_z), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_z[0 * stride + jj], pos0_z); | |
| // Position frame 1 (slot 1) | |
| __m128i shorts1_x = _mm_load_si128((__m128i*)(clp->keys.pos_x + base_pos1)); | |
| __m256i mag1_x = _mm256_cvtepu16_epi32(shorts1_x); | |
| __m256 pos1_x = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag1_x), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_x[1 * stride + jj], pos1_x); | |
| __m128i shorts1_y = _mm_load_si128((__m128i*)(clp->keys.pos_y + base_pos1)); | |
| __m256i mag1_y = _mm256_cvtepu16_epi32(shorts1_y); | |
| __m256 pos1_y = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag1_y), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_y[1 * stride + jj], pos1_y); | |
| __m128i shorts1_z = _mm_load_si128((__m128i*)(clp->keys.pos_z + base_pos1)); | |
| __m256i mag1_z = _mm256_cvtepu16_epi32(shorts1_z); | |
| __m256 pos1_z = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag1_z), pos_scl), pos_off); | |
| _mm256_store_ps(&ans->caches[m].pos_z[1 * stride + jj], pos1_z); | |
| // Scale frame 0 (slot 0) | |
| __m128i shorts_s0 = _mm_load_si128((__m128i*)(clp->keys.scl_s + base_scl0)); | |
| __m256i mag_s0 = _mm256_cvtepu16_epi32(shorts_s0); | |
| __m256 scl0 = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag_s0), scl_scl), scl_off); | |
| _mm256_store_ps(&ans->caches[m].scl_s[0 * stride + jj], scl0); | |
| // Scale frame 1 (slot 1) | |
| __m128i shorts_s1 = _mm_load_si128((__m128i*)(clp->keys.scl_s + base_scl1)); | |
| __m256i mag_s1 = _mm256_cvtepu16_epi32(shorts_s1); | |
| __m256 scl1 = _mm256_add_ps(_mm256_mul_ps(_mm256_cvtepi32_ps(mag_s1), scl_scl), scl_off); | |
| _mm256_store_ps(&ans->caches[m].scl_s[1 * stride + jj], scl1); | |
| // Rotation frame 0 (slot 0) | |
| align32 unsigned rot0_data[8]; | |
| __m256i rot0_packed = _mm256_load_si256((__m256i*)(clp->keys.rot_pos + base_rot0)); | |
| _mm256_store_si256((__m256i*)rot0_data, rot0_packed); | |
| __m256 rot0_x_raw, rot0_y_raw, rot0_z_raw, rot0_w_raw; | |
| qdec_avx(&rot0_x_raw, &rot0_y_raw, &rot0_z_raw, &rot0_w_raw, rot0_data); | |
| __m256 rot0_x = _mm256_add_ps(_mm256_mul_ps(rot0_x_raw, rot_scl), rot_off); | |
| __m256 rot0_y = _mm256_add_ps(_mm256_mul_ps(rot0_y_raw, rot_scl), rot_off); | |
| __m256 rot0_z = _mm256_add_ps(_mm256_mul_ps(rot0_z_raw, rot_scl), rot_off); | |
| __m256 rot0_w = _mm256_add_ps(_mm256_mul_ps(rot0_w_raw, rot_scl), rot_off); | |
| _mm256_store_ps(&ans->caches[m].rot_x[0 * stride + jj], rot0_x); | |
| _mm256_store_ps(&ans->caches[m].rot_y[0 * stride + jj], rot0_y); | |
| _mm256_store_ps(&ans->caches[m].rot_z[0 * stride + jj], rot0_z); | |
| _mm256_store_ps(&ans->caches[m].rot_w[0 * stride + jj], rot0_w); | |
| // Rotation frame 1 (slot 1) | |
| align32 unsigned rot1_data[8]; | |
| __m256i rot1_packed = _mm256_load_si256((__m256i*)(clp->keys.rot_pos + base_rot1)); | |
| _mm256_store_si256((__m256i*)rot1_data, rot1_packed); | |
| __m256 rot1_x_raw, rot1_y_raw, rot1_z_raw, rot1_w_raw; | |
| qdec_avx(&rot1_x_raw, &rot1_y_raw, &rot1_z_raw, &rot1_w_raw, rot1_data); | |
| __m256 rot1_x = _mm256_add_ps(_mm256_mul_ps(rot1_x_raw, rot_scl), rot_off); | |
| __m256 rot1_y = _mm256_add_ps(_mm256_mul_ps(rot1_y_raw, rot_scl), rot_off); | |
| __m256 rot1_z = _mm256_add_ps(_mm256_mul_ps(rot1_z_raw, rot_scl), rot_off); | |
| __m256 rot1_w = _mm256_add_ps(_mm256_mul_ps(rot1_w_raw, rot_scl), rot_off); | |
| _mm256_store_ps(&ans->caches[m].rot_x[1 * stride + jj], rot1_x); | |
| _mm256_store_ps(&ans->caches[m].rot_y[1 * stride + jj], rot1_y); | |
| _mm256_store_ps(&ans->caches[m].rot_z[1 * stride + jj], rot1_z); | |
| _mm256_store_ps(&ans->caches[m].rot_w[1 * stride + jj], rot1_w); | |
| } | |
| } | |
| } | |
| int next_slot = 1 - ans->caches[m].prev_slot; | |
| int prev_idx_base = ans->caches[m].prev_slot * stride; | |
| int next_idx_base = next_slot * stride; | |
| float t = ans->exact_frame[m] - ans->frame[m]; | |
| // Joint loop: Use dynamic indices for loads | |
| for (int j = 0; j < clp->joint_cnt; j += 8) { | |
| int prev_idx = prev_idx_base + j; | |
| int next_idx = next_idx_base + j; | |
| // Position | |
| __m256 pos0_x = _mm256_load_ps(&ans->caches[m].pos_x[prev_idx]); | |
| __m256 pos1_x = _mm256_load_ps(&ans->caches[m].pos_x[next_idx]); | |
| __m256 pos0_y = _mm256_load_ps(&ans->caches[m].pos_y[prev_idx]); | |
| __m256 pos1_y = _mm256_load_ps(&ans->caches[m].pos_y[next_idx]); | |
| __m256 pos0_z = _mm256_load_ps(&ans->caches[m].pos_z[prev_idx]); | |
| __m256 pos1_z = _mm256_load_ps(&ans->caches[m].pos_z[next_idx]); | |
| // Scale | |
| __m256 scl0 = _mm256_load_ps(&ans->caches[m].scl_s[prev_idx]); | |
| __m256 scl1 = _mm256_load_ps(&ans->caches[m].scl_s[next_idx]); | |
| // Rotation | |
| __m256 rot0_x = _mm256_load_ps(&ans->caches[m].rot_x[prev_idx]); | |
| __m256 rot1_x = _mm256_load_ps(&ans->caches[m].rot_x[next_idx]); | |
| __m256 rot0_y = _mm256_load_ps(&ans->caches[m].rot_y[prev_idx]); | |
| __m256 rot1_y = _mm256_load_ps(&ans->caches[m].rot_y[next_idx]); | |
| __m256 rot0_z = _mm256_load_ps(&ans->caches[m].rot_z[prev_idx]); | |
| __m256 rot1_z = _mm256_load_ps(&ans->caches[m].rot_z[next_idx]); | |
| __m256 rot0_w = _mm256_load_ps(&ans->caches[m].rot_w[prev_idx]); | |
| __m256 rot1_w = _mm256_load_ps(&ans->caches[m].rot_w[next_idx]); | |
| // Linear interpolation for position and scale | |
| __m256 res_x = _mm256_fmadd_ps(h01, pos1_x, _mm256_mul_ps(h00, pos0_x)); | |
| __m256 res_y = _mm256_fmadd_ps(h01, pos1_y, _mm256_mul_ps(h00, pos0_y)); | |
| __m256 res_z = _mm256_fmadd_ps(h01, pos1_z, _mm256_mul_ps(h00, pos0_z)); | |
| __m256 res_scl = _mm256_fmadd_ps(h01, scl1, _mm256_mul_ps(h00, scl0)); | |
| // ONLERP for rotation | |
| __m256 zero = _mm256_setzero_ps(); | |
| __m256 one = _mm256_set1_ps(1.0f); | |
| __m256 tt = _mm256_set1_ps(t); | |
| __m256 ca = _mm256_add_ps( | |
| _mm256_add_ps(_mm256_mul_ps(rot0_x, rot1_x), _mm256_mul_ps(rot0_y, rot1_y)), | |
| _mm256_add_ps(_mm256_mul_ps(rot0_z, rot1_z), _mm256_mul_ps(rot0_w, rot1_w))); | |
| __m256 mask_neg = _mm256_cmp_ps(ca, zero, _CMP_LT_OS); | |
| __m256 fabs_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)); | |
| __m256 d = _mm256_and_ps(ca, fabs_mask); | |
| __m256 sign_mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)); | |
| rot1_x = _mm256_xor_ps(rot1_x, _mm256_and_ps(sign_mask, _mm256_castps_si256(mask_neg))); | |
| rot1_y = _mm256_xor_ps(rot1_y, _mm256_and_ps(sign_mask, _mm256_castps_si256(mask_neg))); | |
| rot1_z = _mm256_xor_ps(rot1_z, _mm256_and_ps(sign_mask, _mm256_castps_si256(mask_neg))); | |
| rot1_w = _mm256_xor_ps(rot1_w, _mm256_and_ps(sign_mask, _mm256_castps_si256(mask_neg))); | |
| // Compute A = 1.0904 + d * (-3.2452 + d * (3.55645 - d * 1.43519)) | |
| __m256 tmp_a1 = _mm256_fmadd_ps(d, _mm256_set1_ps(-1.43519f), _mm256_set1_ps(3.55645f)); | |
| __m256 tmp_a2 = _mm256_fmadd_ps(d, tmp_a1, _mm256_set1_ps(-3.2452f)); | |
| __m256 A = _mm256_fmadd_ps(d, tmp_a2, _mm256_set1_ps(1.0904f)); | |
| // Compute B = 0.848013 + d * (-1.06021 + d * 0.215638) | |
| __m256 tmp_b1 = _mm256_fmadd_ps(d, _mm256_set1_ps(0.215638f), _mm256_set1_ps(-1.06021f)); | |
| __m256 B = _mm256_fmadd_ps(d, tmp_b1, _mm256_set1_ps(0.848013f)); | |
| __m256 dt05 = _mm256_sub_ps(tt, _mm256_set1_ps(0.5f)); | |
| __m256 dt05_sq = _mm256_mul_ps(dt05, dt05); | |
| __m256 k = _mm256_fmadd_ps(A, dt05_sq, B); | |
| __m256 t_m05 = _mm256_sub_ps(tt, _mm256_set1_ps(0.5f)); | |
| __m256 t_m1 = _mm256_sub_ps(tt, one); | |
| __m256 term = _mm256_mul_ps(tt, _mm256_mul_ps(t_m05, t_m1)); | |
| __m256 ot = _mm256_fmadd_ps(k, term, tt); | |
| __m256 lt = _mm256_sub_ps(one, ot); | |
| __m256 res_rot_x = _mm256_fmadd_ps(ot, rot1_x, _mm256_mul_ps(lt, rot0_x)); | |
| __m256 res_rot_y = _mm256_fmadd_ps(ot, rot1_y, _mm256_mul_ps(lt, rot0_y)); | |
| __m256 res_rot_z = _mm256_fmadd_ps(ot, rot1_z, _mm256_mul_ps(lt, rot0_z)); | |
| __m256 res_rot_w = _mm256_fmadd_ps(ot, rot1_w, _mm256_mul_ps(lt, rot0_w)); | |
| // Normalize | |
| __m256 un = _mm256_fmadd_ps(res_rot_w, res_rot_w, | |
| _mm256_fmadd_ps(res_rot_z, res_rot_z, | |
| _mm256_fmadd_ps(res_rot_y, res_rot_y, | |
| _mm256_mul_ps(res_rot_x, res_rot_x)))); | |
| __m256 us0 = _mm256_rsqrt_ps(un); | |
| __m256 us1 = _mm256_mul_ps(_mm256_mul_ps(_mm256_set1_ps(0.5f), us0), | |
| _mm256_sub_ps(_mm256_set1_ps(3.0f), | |
| _mm256_mul_ps(_mm256_mul_ps(us0, us0), un))); | |
| res_rot_x = _mm256_mul_ps(res_rot_x, us1); | |
| res_rot_y = _mm256_mul_ps(res_rot_y, us1); | |
| res_rot_z = _mm256_mul_ps(res_rot_z, us1); | |
| res_rot_w = _mm256_mul_ps(res_rot_w, us1); // FIX: Scale w component too | |
| // Stream | |
| _mm256_stream_ps(out_ptr + 0 * stride + j, res_x); | |
| _mm256_stream_ps(out_ptr + 1 * stride + j, res_y); | |
| _mm256_stream_ps(out_ptr + 2 * stride + j, res_z); | |
| _mm256_stream_ps(out_ptr + 3 * stride + j, res_scl); | |
| _mm256_stream_ps(out_ptr + 4 * stride + j, res_rot_x); | |
| _mm256_stream_ps(out_ptr + 5 * stride + j, res_rot_y); | |
| _mm256_stream_ps(out_ptr + 6 * stride + j, res_rot_z); | |
| } | |
| } | |
| _mm_sfence(); // Ensure all streaming stores complete | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "cgltf.h" | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <math.h> | |
| #include <string.h> | |
| #include <assert.h> | |
| #include <float.h> | |
| #include <stdbool.h> | |
| #include <limits.h> | |
| #include <stdint.h> | |
| #define MAX_JOINTS 256 | |
| #define MAX_ALL_JOINTS 1024 | |
| #define MAX_FRAMES (16*1024) | |
| #define DIM_MAX 4 | |
| #define MAT4_SIZE 16 | |
| typedef struct { | |
| uint64_t bits[MAX_FRAMES/64]; // 256*64 = 16384 bits | |
| } bitset_t; | |
| typedef struct { | |
| int start; | |
| int end; | |
| } interval_t; | |
| typedef struct { | |
| float tx, ty, tz; | |
| float rx, ry, rz, rw; | |
| float sx, sy, sz; | |
| } transform_t; | |
| typedef float mat4[16]; | |
| static float g_pos_x[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_pos_y[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_pos_z[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_rot_x[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_rot_y[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_rot_z[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_rot_w[MAX_JOINTS * MAX_FRAMES]; | |
| static float g_scl_s[MAX_JOINTS * MAX_FRAMES]; | |
| static bitset_t g_used_pos_bits; | |
| static bitset_t g_used_rot_bits; | |
| static bitset_t g_used_scl_bits; | |
| static int g_key_pos_list[MAX_FRAMES]; | |
| static int g_key_rot_list[MAX_FRAMES]; | |
| static int g_key_scl_list[MAX_FRAMES]; | |
| static unsigned short g_frame_to_key_pos[MAX_FRAMES]; | |
| static unsigned short g_frame_to_key_rot[MAX_FRAMES]; | |
| static unsigned short g_frame_to_key_scl[MAX_FRAMES]; | |
| static int g_kept[MAX_FRAMES]; | |
| static float g_tmp_pos[3 * MAX_FRAMES]; | |
| static float g_tmp_rot[4 * MAX_FRAMES]; | |
| static float g_tmp_scl[MAX_FRAMES]; | |
| static unsigned short g_keys_pos_x[MAX_FRAMES * MAX_JOINTS]; | |
| static unsigned short g_keys_pos_y[MAX_FRAMES * MAX_JOINTS]; | |
| static unsigned short g_keys_pos_z[MAX_FRAMES * MAX_JOINTS]; | |
| static unsigned short g_keys_scl[MAX_FRAMES * MAX_JOINTS]; | |
| static unsigned g_keys_rot[MAX_FRAMES * MAX_JOINTS]; | |
| static int g_bone_indices[MAX_JOINTS]; | |
| static int g_parents[MAX_ALL_JOINTS]; | |
| static float g_globals[MAX_ALL_JOINTS * MAT4_SIZE]; | |
| static interval_t g_stack[MAX_FRAMES]; | |
| static uint32_t | |
| fnv1a_32(const char *str) { | |
| uint32_t hash = 0x811c9dc5u; | |
| const unsigned char *s = (const unsigned char *)str; | |
| while (*s != '\0') { | |
| hash ^= *s++; | |
| hash *= 0x01000193u; | |
| } | |
| return hash; | |
| } | |
| static void | |
| mat4_set_identity(mat4 restrict m) { | |
| m[0] = m[5] = m[10] = m[15] = 1.0f; | |
| m[1] = m[2] = m[3] = m[4] = m[6] = m[7] = m[8] = 0.0f; | |
| m[9] = m[11] = m[12] = m[13] = m[14] = 0.0f; | |
| } | |
| static void | |
| mat4_mul(const mat4 restrict a, const mat4 restrict b, mat4 restrict out) { | |
| for (int i = 0; i < 4; ++i) { | |
| for (int j = 0; j < 4; ++j) { | |
| out[i * 4 + j] = 0.0f; | |
| for (int k = 0; k < 4; ++k) { | |
| out[i * 4 + j] += a[i * 4 + k] * b[k * 4 + j]; | |
| } | |
| } | |
| } | |
| } | |
| static void | |
| mat4_invert(const mat4 restrict m, mat4 restrict out) { | |
| float det = m[0] * (m[5] * m[10] - m[9] * m[6]) - | |
| m[4] * (m[1] * m[10] - m[9] * m[2]) + | |
| m[8] * (m[1] * m[6] - m[5] * m[2]); | |
| if (fabsf(det) < 1e-8f) { | |
| mat4_set_identity(out); // fallback | |
| return; | |
| } | |
| float idet = 1.0f / det; | |
| out[0] = (m[5] * m[10] - m[6] * m[9]) * idet; | |
| out[1] = (m[9] * m[2] - m[1] * m[10]) * idet; | |
| out[2] = (m[1] * m[6] - m[5] * m[2]) * idet; | |
| out[4] = (m[6] * m[8] - m[4] * m[10]) * idet; | |
| out[5] = (m[0] * m[10] - m[8] * m[2]) * idet; | |
| out[6] = (m[4] * m[2] - m[0] * m[6]) * idet; | |
| out[8] = (m[4] * m[9] - m[5] * m[8]) * idet; | |
| out[9] = (m[8] * m[1] - m[0] * m[9]) * idet; | |
| out[10] = (m[0] * m[5] - m[4] * m[1]) * idet; | |
| out[3] = out[7] = out[11] = 0.0f; | |
| out[15] = 1.0f; | |
| out[12] = -(out[0] * m[12] + out[4] * m[13] + out[8] * m[14]); | |
| out[13] = -(out[1] * m[12] + out[5] * m[13] + out[9] * m[14]); | |
| out[14] = -(out[2] * m[12] + out[6] * m[13] + out[10] * m[14]); | |
| } | |
| static void | |
| mat4_from_trs(const float restrict pos[3], const float restrict rot[4], | |
| const float restrict scl[3], mat4 restrict out) { | |
| float xx = rot[0] * rot[0]; | |
| float xy = rot[0] * rot[1]; | |
| float xz = rot[0] * rot[2]; | |
| float xw = rot[0] * rot[3]; | |
| float yy = rot[1] * rot[1]; | |
| float yz = rot[1] * rot[2]; | |
| float yw = rot[1] * rot[3]; | |
| float zz = rot[2] * rot[2]; | |
| float zw = rot[2] * rot[3]; | |
| float rm00 = 1.0f - 2.0f * (yy + zz); | |
| float rm01 = 2.0f * (xy - zw); | |
| float rm02 = 2.0f * (xz + yw); | |
| float rm10 = 2.0f * (xy + zw); | |
| float rm11 = 1.0f - 2.0f * (xx + zz); | |
| float rm12 = 2.0f * (yz - xw); | |
| float rm20 = 2.0f * (xz - yw); | |
| float rm21 = 2.0f * (yz + xw); | |
| float rm22 = 1.0f - 2.0f * (xx + yy); | |
| out[0] = rm00 * scl[0]; | |
| out[1] = rm10 * scl[0]; | |
| out[2] = rm20 * scl[0]; | |
| out[4] = rm01 * scl[1]; | |
| out[5] = rm11 * scl[1]; | |
| out[6] = rm21 * scl[1]; | |
| out[8] = rm02 * scl[2]; | |
| out[9] = rm12 * scl[2]; | |
| out[10] = rm22 * scl[2]; | |
| out[3] = out[7] = out[11] = 0.0f; | |
| out[12] = pos[0]; | |
| out[13] = pos[1]; | |
| out[14] = pos[2]; | |
| out[15] = 1.0f; | |
| } | |
| static void | |
| mat4_decompose(const mat4 restrict m, float restrict pos[3], | |
| float restrict rot[4], float restrict scl[3]) { | |
| pos[0] = m[12]; | |
| pos[1] = m[13]; | |
| pos[2] = m[14]; | |
| scl[0] = sqrtf(m[0] * m[0] + m[1] * m[1] + m[2] * m[2]); | |
| scl[1] = sqrtf(m[4] * m[4] + m[5] * m[5] + m[6] * m[6]); | |
| scl[2] = sqrtf(m[8] * m[8] + m[9] * m[9] + m[10] * m[10]); | |
| if (fabsf(scl[0]) < 1e-8f) { | |
| scl[0] = 1.0f; | |
| } | |
| if (fabsf(scl[1]) < 1e-8f) { | |
| scl[1] = 1.0f; | |
| } | |
| if (fabsf(scl[2]) < 1e-8f) { | |
| scl[2] = 1.0f; | |
| } | |
| float r[3][3]; | |
| r[0][0] = m[0] / scl[0]; r[0][1] = m[4] / scl[1]; r[0][2] = m[8] / scl[2]; | |
| r[1][0] = m[1] / scl[0]; r[1][1] = m[5] / scl[1]; r[1][2] = m[9] / scl[2]; | |
| r[2][0] = m[2] / scl[0]; r[2][1] = m[6] / scl[1]; r[2][2] = m[10] / scl[2]; | |
| float trace = r[0][0] + r[1][1] + r[2][2]; | |
| if (trace > 0.0f) { | |
| float s = 0.5f / sqrtf(trace + 1.0f); | |
| rot[3] = 0.5f / s; | |
| rot[0] = (r[2][1] - r[1][2]) * s; | |
| rot[1] = (r[0][2] - r[2][0]) * s; | |
| rot[2] = (r[1][0] - r[0][1]) * s; | |
| } else if (r[0][0] > r[1][1] && r[0][0] > r[2][2]) { | |
| float s = 0.5f / sqrtf(1.0f + r[0][0] - r[1][1] - r[2][2]); | |
| rot[0] = 0.5f / s; | |
| rot[1] = (r[1][0] + r[0][1]) * s; | |
| rot[2] = (r[0][2] + r[2][0]) * s; | |
| rot[3] = (r[2][1] - r[1][2]) * s; | |
| } else if (r[1][1] > r[2][2]) { | |
| float s = 0.5f / sqrtf(1.0f + r[1][1] - r[0][0] - r[2][2]); | |
| rot[0] = (r[1][0] + r[0][1]) * s; | |
| rot[1] = 0.5f / s; | |
| rot[2] = (r[1][2] + r[2][1]) * s; | |
| rot[3] = (r[0][2] - r[2][0]) * s; | |
| } else { | |
| float s = 0.5f / sqrtf(1.0f + r[2][2] - r[0][0] - r[1][1]); | |
| rot[0] = (r[0][2] + r[2][0]) * s; | |
| rot[1] = (r[1][2] + r[2][1]) * s; | |
| rot[2] = 0.5f / s; | |
| rot[3] = (r[1][0] - r[0][1]) * s; | |
| } | |
| float len = sqrtf(rot[0] * rot[0] + rot[1] * rot[1] + rot[2] * rot[2] + rot[3] * rot[3]); | |
| if (len > 1e-8f) { | |
| float inv_len = 1.0f / len; | |
| rot[0] *= inv_len; | |
| rot[1] *= inv_len; | |
| rot[2] *= inv_len; | |
| rot[3] *= inv_len; | |
| } | |
| } | |
| static void | |
| slerp(const float restrict *q0, const float restrict *q1, | |
| float t, float restrict *qout) { | |
| float q1_flip[4] = { q1[0], q1[1], q1[2], q1[3] }; | |
| float dot = q0[0] * q1[0] + q0[1] * q1[1] + q0[2] * q1[2] + q0[3] * q1[3]; | |
| if (dot < 0.0f) { | |
| q1_flip[0] = -q1[0]; | |
| q1_flip[1] = -q1[1]; | |
| q1_flip[2] = -q1[2]; | |
| q1_flip[3] = -q1[3]; | |
| dot = -dot; | |
| } | |
| if (dot > 0.9995f) { | |
| qout[0] = q0[0] + t * (q1_flip[0] - q0[0]); | |
| qout[1] = q0[1] + t * (q1_flip[1] - q0[1]); | |
| qout[2] = q0[2] + t * (q1_flip[2] - q0[2]); | |
| qout[3] = q0[3] + t * (q1_flip[3] - q0[3]); | |
| } else { | |
| float omega = acosf(dot); | |
| float so = sinf(omega); | |
| float a = sinf(omega * (1.0f - t)) / so; | |
| float b = sinf(omega * t) / so; | |
| qout[0] = a * q0[0] + b * q1_flip[0]; | |
| qout[1] = a * q0[1] + b * q1_flip[1]; | |
| qout[2] = a * q0[2] + b * q1_flip[2]; | |
| qout[3] = a * q0[3] + b * q1_flip[3]; | |
| } | |
| float norm = sqrtf(qout[0] * qout[0] + qout[1] * qout[1] + qout[2] * qout[2] + qout[3] * qout[3]); | |
| if (norm > 0.0f) { | |
| float inv_norm = 1.0f / norm; | |
| qout[0] *= inv_norm; | |
| qout[1] *= inv_norm; | |
| qout[2] *= inv_norm; | |
| qout[3] *= inv_norm; | |
| } | |
| } | |
| static void | |
| eval_vec3(const cgltf_animation *anim, const cgltf_node *node, | |
| double time, cgltf_target_path_type path, | |
| float restrict *x, float restrict *y, float restrict *z) { | |
| cgltf_channel *chan = NULL; | |
| for (size_t ci = 0; ci < anim->channels_count; ++ci) { | |
| cgltf_channel *c = &anim->channels[ci]; | |
| if (c->target_node == node && c->target_path == path) { | |
| if (chan) { | |
| fprintf(stderr, "Multiple channels for %s on node %s\n", | |
| (path == CGLTF_TARGET_PATH_TRANSLATION ? "translation" : "scale"), | |
| node->name ? node->name : "unnamed"); | |
| } | |
| chan = c; | |
| } | |
| } | |
| if (!chan) { | |
| *x = 0.0f; | |
| *y = 0.0f; | |
| *z = (path == CGLTF_TARGET_PATH_SCALE ? 1.0f : 0.0f); | |
| return; | |
| } | |
| cgltf_sampler *samp = &anim->samplers[chan->sampler_index]; | |
| cgltf_accessor *times_acc = samp->input; | |
| cgltf_accessor *data_acc = samp->output; | |
| if (times_acc->count == 0) { | |
| *x = 0.0f; | |
| *y = 0.0f; | |
| *z = (path == CGLTF_TARGET_PATH_SCALE ? 1.0f : 0.0f); | |
| return; | |
| } | |
| float tmin_f, tmax_f; | |
| cgltf_accessor_read_float(times_acc, 0, 0, &tmin_f); | |
| cgltf_accessor_read_float(times_acc, times_acc->count - 1, 0, &tmax_f); | |
| double tmin = (double)tmin_f; | |
| double tmax = (double)tmax_f; | |
| time = fmax(time, tmin); | |
| time = fmin(time, tmax); | |
| size_t count = times_acc->count; | |
| size_t i = 0; | |
| if (count > 1) { | |
| size_t low = 0, high = count - 1; | |
| while (low < high) { | |
| size_t mid = low + (high - low) / 2; | |
| float tmid_f; | |
| cgltf_accessor_read_float(times_acc, mid, 0, &tmid_f); | |
| if ((double)tmid_f <= time) { | |
| low = mid + 1; | |
| } else { | |
| high = mid; | |
| } | |
| } | |
| i = low - 1; | |
| if (i >= count) { | |
| i = count - 1; | |
| } | |
| if (i == (size_t)-1) { | |
| i = 0; | |
| } | |
| } | |
| float t0_f, t1_f = t0_f; | |
| cgltf_accessor_read_float(times_acc, i, 0, &t0_f); | |
| double t0 = (double)t0_f; | |
| bool at_end = (i == count - 1); | |
| float factor = 0.0f; | |
| if (!at_end) { | |
| cgltf_accessor_read_float(times_acc, i + 1, 0, &t1_f); | |
| double t1 = (double)t1_f; | |
| factor = (float)((time - t0) / (t1 - t0)); | |
| } | |
| float vx0, vy0, vz0; | |
| float vx1 = vx0, vy1 = vy0, vz1 = vz0; | |
| cgltf_accessor_read_float(data_acc, i, 0, &vx0); | |
| cgltf_accessor_read_float(data_acc, i, 1, &vy0); | |
| cgltf_accessor_read_float(data_acc, i, 2, &vz0); | |
| if (!at_end) { | |
| cgltf_accessor_read_float(data_acc, i + 1, 0, &vx1); | |
| cgltf_accessor_read_float(data_acc, i + 1, 1, &vy1); | |
| cgltf_accessor_read_float(data_acc, i + 1, 2, &vz1); | |
| vx0 += factor * (vx1 - vx0); | |
| vy0 += factor * (vy1 - vy0); | |
| vz0 += factor * (vz1 - vz0); | |
| } | |
| *x = vx0; | |
| *y = vy0; | |
| *z = vz0; | |
| } | |
| static void | |
| eval_quat(const cgltf_animation *anim, const cgltf_node *node, | |
| double time, float restrict *x, float restrict *y, | |
| float restrict *z, float restrict *w) { | |
| cgltf_channel *chan = NULL; | |
| for (size_t ci = 0; ci < anim->channels_count; ++ci) { | |
| cgltf_channel *c = &anim->channels[ci]; | |
| if (c->target_node == node && c->target_path == CGLTF_TARGET_PATH_ROTATION) { | |
| if (chan) { | |
| fprintf(stderr, "Multiple rotation channels on node %s\n", node->name ? node->name : "unnamed"); | |
| } | |
| chan = c; | |
| } | |
| } | |
| if (!chan) { | |
| *x = 0.0f; | |
| *y = 0.0f; | |
| *z = 0.0f; | |
| *w = 1.0f; | |
| return; | |
| } | |
| cgltf_sampler *samp = &anim->samplers[chan->sampler_index]; | |
| cgltf_accessor *times_acc = samp->input; | |
| cgltf_accessor *data_acc = samp->output; | |
| if (times_acc->count == 0) { | |
| *x = 0.0f; | |
| *y = 0.0f; | |
| *z = 0.0f; | |
| *w = 1.0f; | |
| return; | |
| } | |
| float tmin_f, tmax_f; | |
| cgltf_accessor_read_float(times_acc, 0, 0, &tmin_f); | |
| cgltf_accessor_read_float(times_acc, times_acc->count - 1, 0, &tmax_f); | |
| double tmin = (double)tmin_f; | |
| double tmax = (double)tmax_f; | |
| time = fmax(time, tmin); | |
| time = fmin(time, tmax); | |
| size_t count = times_acc->count; | |
| size_t i = 0; | |
| if (count > 1) { | |
| size_t low = 0, high = count - 1; | |
| while (low < high) { | |
| size_t mid = low + (high - low) / 2; | |
| float tmid_f; | |
| cgltf_accessor_read_float(times_acc, mid, 0, &tmid_f); | |
| if ((double)tmid_f <= time) { | |
| low = mid + 1; | |
| } else { | |
| high = mid; | |
| } | |
| } | |
| i = low - 1; | |
| if (i >= count) { | |
| i = count - 1; | |
| } | |
| if (i == (size_t)-1){ | |
| i = 0; | |
| } | |
| } | |
| float t0_f, t1_f = t0_f; | |
| cgltf_accessor_read_float(times_acc, i, 0, &t0_f); | |
| double t0 = (double)t0_f; | |
| bool at_end = (i == count - 1); | |
| float factor = 0.0f; | |
| if (!at_end) { | |
| cgltf_accessor_read_float(times_acc, i + 1, 0, &t1_f); | |
| double t1 = (double)t1_f; | |
| factor = (float)((time - t0) / (t1 - t0)); | |
| } | |
| float q0[4], q1[4] = {0}; | |
| cgltf_accessor_read_float(data_acc, i, 0, &q0[0]); | |
| cgltf_accessor_read_float(data_acc, i, 1, &q0[1]); | |
| cgltf_accessor_read_float(data_acc, i, 2, &q0[2]); | |
| cgltf_accessor_read_float(data_acc, i, 3, &q0[3]); | |
| if (!at_end) { | |
| cgltf_accessor_read_float(data_acc, i + 1, 0, &q1[0]); | |
| cgltf_accessor_read_float(data_acc, i + 1, 1, &q1[1]); | |
| cgltf_accessor_read_float(data_acc, i + 1, 2, &q1[2]); | |
| cgltf_accessor_read_float(data_acc, i + 1, 3, &q1[3]); | |
| float qout[4]; | |
| slerp(q0, q1, factor, qout); | |
| q0[0] = qout[0]; | |
| q0[1] = qout[1]; | |
| q0[2] = qout[2]; | |
| q0[3] = qout[3]; | |
| } | |
| *x = q0[0]; | |
| *y = q0[1]; | |
| *z = q0[2]; | |
| *w = q0[3]; | |
| } | |
| static void | |
| eval_transform(const cgltf_animation *anim, const cgltf_node *node, | |
| double time, transform_t *xf) { | |
| xf->tx = 0.0f; xf->ty = 0.0f; xf->tz = 0.0f; | |
| xf->rx = 0.0f; xf->ry = 0.0f; xf->rz = 0.0f; xf->rw = 1.0f; | |
| xf->sx = 1.0f; xf->sy = 1.0f; xf->sz = 1.0f; | |
| eval_vec3(anim, node, time, CGLTF_TARGET_PATH_TRANSLATION, &xf->tx, &xf->ty, &xf->tz); | |
| eval_vec3(anim, node, time, CGLTF_TARGET_PATH_SCALE, &xf->sx, &xf->sy, &xf->sz); | |
| eval_quat(anim, node, time, &xf->rx, &xf->ry, &xf->rz, &xf->rw); | |
| } | |
| static inline void | |
| set_bit(bitset_t *bs, size_t idx) { | |
| bs->bits[idx / 64] |= (1ULL << (idx % 64)); | |
| } | |
| static inline bool | |
| test_bit(const bitset_t *bs, size_t idx) { | |
| return (bs->bits[idx / 64] & (1ULL << (idx % 64))) != 0; | |
| } | |
| static int | |
| cmp_int(const void *a, const void *b) { | |
| return *(const int*)a - *(const int*)b; | |
| } | |
| static void | |
| rdp_simplify(const float *data, int num, int dim, float eps, | |
| int restrict *kept, int restrict *count_out) { | |
| if (num <= 0) { | |
| *count_out = 0; | |
| return; | |
| } | |
| if (num == 1) { | |
| kept[0] = 0; | |
| *count_out = 1; | |
| return; | |
| } | |
| int stack_top = 0; | |
| g_stack[stack_top++] = (interval_t){0, num - 1}; | |
| int kept_size = 0; | |
| while (stack_top > 0) { | |
| interval_t iv = g_stack[--stack_top]; | |
| int start = iv.start; | |
| int end = iv.end; | |
| if (end - start < 1) { | |
| if (kept_size == 0 || kept[kept_size - 1] != start) { | |
| kept[kept_size++] = start; | |
| } | |
| continue; | |
| } | |
| float dx[DIM_MAX]; | |
| float len2 = 0.0f; | |
| for (int d = 0; d < dim; ++d) { | |
| dx[d] = data[end * dim + d] - data[start * dim + d]; | |
| len2 += dx[d] * dx[d]; | |
| } | |
| if (len2 <= 0.0f) { | |
| if (kept_size == 0 || kept[kept_size - 1] != start) { | |
| kept[kept_size++] = start; | |
| } | |
| if (kept_size == 0 || kept[kept_size - 1] != end) { | |
| kept[kept_size++] = end; | |
| } | |
| continue; | |
| } | |
| float max_dist2 = 0.0f; | |
| int max_i = start; | |
| float inv_len2 = 1.0f / len2; | |
| for (int i = start + 1; i < end; ++i) { | |
| float t_num = 0.0f; | |
| for (int d = 0; d < dim; ++d) { | |
| t_num += (data[i * dim + d] - data[start * dim + d]) * dx[d]; | |
| } | |
| float t = fmaxf(0.0f, fminf(1.0f, t_num * inv_len2)); | |
| float dist2 = 0.0f; | |
| for (int d = 0; d < dim; ++d) { | |
| float proj = data[start * dim + d] + t * dx[d]; | |
| float dif = data[i * dim + d] - proj; | |
| dist2 += dif * dif; | |
| } | |
| if (dist2 > max_dist2) { | |
| max_dist2 = dist2; | |
| max_i = i; | |
| } | |
| } | |
| float max_dist = sqrtf(max_dist2); | |
| if (max_dist <= eps) { | |
| if (kept_size == 0 || kept[kept_size - 1] != start) { | |
| kept[kept_size++] = start; | |
| } | |
| if (kept_size == 0 || kept[kept_size - 1] != end) { | |
| kept[kept_size++] = end; | |
| } | |
| } else { | |
| // Push right first, then left to simulate recursion order | |
| g_stack[stack_top++] = (interval_t){max_i, end}; | |
| g_stack[stack_top++] = (interval_t){start, max_i}; | |
| } | |
| } | |
| // sort and unique | |
| qsort(kept, kept_size, sizeof(int), cmp_int); | |
| int unique_size = 0; | |
| for (int i = 0; i < kept_size; ++i) { | |
| if (i == 0 || kept[i] != kept[i - 1]) { | |
| kept[unique_size++] = kept[i]; | |
| } | |
| } | |
| *count_out = unique_size; | |
| } | |
| static unsigned | |
| qenc(const float *qin) { | |
| float q[4]; | |
| memcpy(q, qin, sizeof(float) * 4); | |
| float qabs[4]; | |
| for (int i = 0; i < 4; ++i) { | |
| qabs[i] = fabsf(q[i]); | |
| } | |
| int top = 0; | |
| for (int i = 1; i < 4; ++i) { | |
| if (qabs[i] > qabs[top]) { | |
| top = i; | |
| } | |
| } | |
| unsigned msk = 511u; | |
| unsigned neg = (q[top] < 0.0f); | |
| unsigned ret = (unsigned)top; | |
| for (int i = 0; i < 4; ++i) { | |
| if (i == top) { | |
| continue; | |
| } | |
| unsigned negbit = ((q[i] < 0.0f) ^ neg); | |
| unsigned mag = (unsigned)(msk * (qabs[i] * 1.414213562f) + 0.5f); | |
| if (mag > msk) { | |
| mag = msk; | |
| } | |
| ret = (ret << 10u) | (negbit << 9u) | mag; | |
| } | |
| return ret; | |
| } | |
| extern int | |
| main(int argc, char **argv) { | |
| if (argc < 3) { | |
| fprintf(stderr, "Usage: %s <gltf_file> <output_c_file> [clip_name] [joint_name1 [joint_name2 ...]]\n", argv[0]); | |
| fprintf(stderr, "If clip_name is provided, joint names follow it. Omitting joint names uses all skin joints.\n"); | |
| return 1; | |
| } | |
| char *gltf_path = argv[1]; | |
| char *out_path = argv[2]; | |
| char *clip_name = (argc > 3) ? argv[3] : "def"; | |
| int first_joint_arg = (argc > 3) ? 4 : 3; | |
| int num_joint_args = argc - first_joint_arg; | |
| if (num_joint_args > MAX_JOINTS) { | |
| fprintf(stderr, "Too many joint names provided: %d (max %d)\n", num_joint_args, MAX_JOINTS); | |
| return 1; | |
| } | |
| cgltf_data *scene = 0; | |
| cgltf_options options = { 0 }; | |
| cgltf_result res = cgltf_parse_file(&options, gltf_path, &scene); | |
| if (res != cgltf_result_success) { | |
| fprintf(stderr, "Failed to parse GLTF: %d\n", res); | |
| return 1; | |
| } | |
| res = cgltf_load_buffers(&options, scene, gltf_path); | |
| if (res != cgltf_result_success) { | |
| fprintf(stderr, "Failed to load GLTF buffers: %d\n", res); | |
| cgltf_free(scene); | |
| return 1; | |
| } | |
| cgltf_skin *skin = (scene->skins_count > 0) ? scene->skins : NULL; | |
| if (!skin || skin->joints_count == 0) { | |
| fprintf(stderr, "No skin or joints found\n"); | |
| cgltf_free(scene); | |
| return 1; | |
| } | |
| int g_all_cnt = (int)skin->joints_count; | |
| if (g_all_cnt > MAX_ALL_JOINTS) { | |
| fprintf(stderr, "Too many all joints: %d (max %d)\n", g_all_cnt, MAX_ALL_JOINTS); | |
| cgltf_free(scene); | |
| return 1; | |
| } | |
| // Build parents | |
| memset(g_parents, -1, sizeof(g_parents)); | |
| for (int k = 0; k < g_all_cnt; ++k) { | |
| cgltf_node *node_k = skin->joints[k]; | |
| for (size_t c = 0; c < node_k->children_count; ++c) { | |
| cgltf_node *ch = node_k->children[c]; | |
| int cidx = -1; | |
| for (int jj = 0; jj < g_all_cnt; ++jj) { | |
| if (skin->joints[jj] == ch) { | |
| cidx = jj; | |
| break; | |
| } | |
| } | |
| if (cidx != -1) { | |
| g_parents[cidx] = k; | |
| } | |
| } | |
| } | |
| // Selected hashes | |
| uint32_t selected_hashes[MAX_JOINTS] = {0}; | |
| for (int s = 0; s < num_joint_args; ++s) { | |
| selected_hashes[s] = fnv1a_32(argv[first_joint_arg + s]); | |
| } | |
| // Map to selected | |
| int sel_cnt = 0; | |
| unsigned long long found[MAX_JOINTS/64] = {0}; | |
| if (num_joint_args == 0) { | |
| for (int aj = 0; aj < g_all_cnt; ++aj) { | |
| g_bone_indices[aj] = aj; | |
| sel_cnt++; | |
| } | |
| } else { | |
| for (int aj = 0; aj < g_all_cnt; ++aj) { | |
| const char *n = skin->joints[aj]->name ? skin->joints[aj]->name : ""; | |
| uint32_t h = fnv1a_32(n); | |
| for (int s = 0; s < num_joint_args; ++s) { | |
| if (selected_hashes[s] == h && !(found[s/64] & (1llu << (s & 63)))) { | |
| found[s/64] = (1llu << (s & 63)); | |
| g_bone_indices[sel_cnt] = aj; | |
| sel_cnt++; | |
| break; | |
| } | |
| } | |
| } | |
| int missing = 0; | |
| for (int s = 0; s < num_joint_args; ++s) { | |
| if (!(found[s/64] & (1llu << (s & 63)))) { | |
| missing++; | |
| } | |
| } | |
| if (missing > 0) { | |
| fprintf(stderr, "Warning: %d skeleton joint names not found in skin\n", missing); | |
| } | |
| } | |
| int joint_cnt = sel_cnt; | |
| if (joint_cnt == 0 || joint_cnt > MAX_JOINTS) { | |
| fprintf(stderr, "Invalid selected joint count: %d (must be 1-%d)\n", joint_cnt, MAX_JOINTS); | |
| cgltf_free(scene); | |
| return 1; | |
| } | |
| if (num_joint_args > 0) { | |
| printf("Using %d selected joints (out of %d provided; %d total in skin)\n", joint_cnt, num_joint_args, g_all_cnt); | |
| } | |
| cgltf_animation *anim = 0; | |
| size_t clip_len = strlen(clip_name); | |
| if (clip_len > 0) { | |
| for (size_t s = 0; s < scene->animations_count; ++s) { | |
| cgltf_animation *candidate = &scene->animations[s]; | |
| if (candidate->name.length == clip_len && | |
| strncmp(candidate->name.data, clip_name, clip_len) == 0) { | |
| anim = candidate; | |
| break; | |
| } | |
| } | |
| } else if (scene->animations_count > 0) { | |
| anim = &scene->animations[0]; | |
| } | |
| if (!anim) { | |
| fprintf(stderr, "No animation available\n"); | |
| cgltf_free(scene); | |
| return 1; | |
| } | |
| double duration_sec = 0.0; | |
| int has_anim_data = 0; | |
| for (size_t s = 0; s < anim->samplers_count; ++s) { | |
| cgltf_accessor *times_acc = anim->samplers[s].input; | |
| if (times_acc && times_acc->count > 0) { | |
| has_anim_data = 1; | |
| float last_time_f; | |
| cgltf_accessor_read_float(times_acc, times_acc->count - 1, 0, &last_time_f); | |
| double last = (double)last_time_f; | |
| if (last > duration_sec) { | |
| duration_sec = last; | |
| } | |
| } | |
| } | |
| if (!has_anim_data || duration_sec <= 0.0) { | |
| duration_sec = 0.0; | |
| } | |
| double fps = 60.0; | |
| int frame_cnt = 1; | |
| if (duration_sec > 0.0) { | |
| frame_cnt = (int)roundf(duration_sec * fps) + 1; | |
| if (frame_cnt > MAX_FRAMES) { | |
| frame_cnt = MAX_FRAMES; | |
| } | |
| } | |
| double dt_sec = (frame_cnt > 1) ? duration_sec / (frame_cnt - 1.0) : 0.0; | |
| for (int i = 0; i < frame_cnt; ++i) { | |
| double time_sec = (i == frame_cnt - 1) ? duration_sec : i * dt_sec; | |
| // Compute all local TRS | |
| float all_local_pos[MAX_ALL_JOINTS * 3]; | |
| float all_local_rot[MAX_ALL_JOINTS * 4]; | |
| float all_local_scl[MAX_ALL_JOINTS * 3]; | |
| for (int aj = 0; aj < g_all_cnt; ++aj) { | |
| cgltf_node *node = skin->joints[aj]; | |
| transform_t xf; | |
| eval_transform(anim, node, time_sec, &xf); | |
| int off = aj * 3; | |
| all_local_pos[off + 0] = xf.tx; | |
| all_local_pos[off + 1] = xf.ty; | |
| all_local_pos[off + 2] = xf.tz; | |
| off = aj * 4; | |
| all_local_rot[off + 0] = xf.rx; | |
| all_local_rot[off + 1] = xf.ry; | |
| all_local_rot[off + 2] = xf.rz; | |
| all_local_rot[off + 3] = xf.rw; | |
| off = aj * 3; | |
| all_local_scl[off + 0] = xf.sx; | |
| all_local_scl[off + 1] = xf.sy; | |
| all_local_scl[off + 2] = xf.sz; | |
| } | |
| // Compute globals | |
| { | |
| mat4 id; | |
| mat4_set_identity(id); | |
| // Set root globals | |
| int all_cnt = (int)skin->joints_count; | |
| for (int r = 0; r < all_cnt; ++r) { | |
| if (g_parents[r] != -1) continue; | |
| float lpos[3] = {all_local_pos[r * 3 + 0], all_local_pos[r * 3 + 1], all_local_pos[r * 3 + 2]}; | |
| float lrot[4] = {all_local_rot[r * 4 + 0], all_local_rot[r * 4 + 1], all_local_rot[r * 4 + 2], all_local_rot[r * 4 + 3]}; | |
| float lscl[3] = {all_local_scl[r * 3 + 0], all_local_scl[r * 3 + 1], all_local_scl[r * 3 + 2]}; | |
| mat4 local_m; | |
| mat4_from_trs(lpos, lrot, lscl, local_m); | |
| mat4 root_g; | |
| mat4_mul(id, local_m, root_g); | |
| memcpy(g_globals + r * MAT4_SIZE, root_g, sizeof(mat4)); | |
| } | |
| // Iterative DFS traversal using stack to simulate recursion | |
| int stack[MAX_ALL_JOINTS]; | |
| int stack_top = 0; | |
| for (int r = 0; r < all_cnt; ++r) { | |
| if (g_parents[r] == -1) { | |
| stack[stack_top++] = r; | |
| } | |
| } | |
| while (stack_top > 0) { | |
| int idx = stack[--stack_top]; | |
| // Process children in reverse order to match recursion depth-first order | |
| cgltf_node *node = skin->joints[idx]; | |
| for (int cc = (int)node->children_count - 1; cc >= 0; --cc) { | |
| cgltf_node *chn = node->children[cc]; | |
| int cidx = -1; | |
| for (int jj = 0; jj < all_cnt; ++jj) { | |
| if (skin->joints[jj] == chn) { | |
| cidx = jj; | |
| break; | |
| } | |
| } | |
| if (cidx == -1) { | |
| continue; | |
| } | |
| float lpos[3] = {{all_local_pos[cidx * 3 + 0], all_local_pos[cidx * 3 + 1], all_local_pos[cidx * 3 + 2]}; | |
| float lrot[4] = {all_local_rot[cidx * 4 + 0], all_local_rot[cidx * 4 + 1], all_local_rot[cidx * 4 + 2], all_local_rot[cidx * 4 + 3]}; | |
| float lscl[3] = {all_local_scl[cidx * 3 + 0], all_local_scl[cidx * 3 + 1], all_local_scl[cidx * 3 + 2]}; | |
| mat4 local_m; | |
| mat4_from_trs(lpos, lrot, lscl, local_m); | |
| mat4 parent_g; | |
| memcpy(parent_g, g_globals + idx * MAT4_SIZE, sizeof(mat4)); | |
| mat4 g; | |
| mat4_mul(parent_g, local_m, g); | |
| memcpy(g_globals + cidx * MAT4_SIZE, g, sizeof(mat4)); | |
| // Push child to stack (safe since MAX_ALL_JOINTS limit) | |
| stack[stack_top++] = cidx; | |
| } | |
| } | |
| } | |
| // Back-compute locals for selected | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| int idx = g_bone_indices[j]; | |
| int off = j * frame_cnt + i; | |
| mat4 g; | |
| memcpy(g, g_globals + idx * MAT4_SIZE, sizeof(mat4)); | |
| float lpos[3], lrot[4], lscl[3]; | |
| if (g_parents[idx] == -1) { | |
| mat4_decompose(g, lpos, lrot, lscl); | |
| } else { | |
| int pidx = g_parents[idx]; | |
| mat4 pg; | |
| memcpy(pg, g_globals + pidx * MAT4_SIZE, sizeof(mat4)); | |
| mat4 parent_inv; | |
| mat4_invert(pg, parent_inv); | |
| mat4 local_m; | |
| mat4_mul(parent_inv, g, local_m); | |
| mat4_decompose(local_m, lpos, lrot, lscl); | |
| } | |
| g_pos_x[off] = lpos[0]; | |
| g_pos_y[off] = lpos[1]; | |
| g_pos_z[off] = lpos[2]; | |
| g_rot_x[off] = lrot[0]; | |
| g_rot_y[off] = lrot[1]; | |
| g_rot_z[off] = lrot[2]; | |
| g_rot_w[off] = lrot[3]; | |
| g_scl_s[off] = (lscl[0] + lscl[1] + lscl[2]) / 3.0f; | |
| } | |
| } | |
| // Simplify | |
| float eps_pos = 0.01f; | |
| float eps_rot = 0.02f; | |
| float eps_scl = 0.001f; | |
| memset(g_used_pos_bits.bits, 0, sizeof(g_used_pos_bits.bits)); | |
| memset(g_used_rot_bits.bits, 0, sizeof(g_used_rot_bits.bits)); | |
| memset(g_used_scl_bits.bits, 0, sizeof(g_used_scl_bits.bits)); | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| // Position | |
| for (int f = 0; f < frame_cnt; ++f) { | |
| int off = j * frame_cnt + f; | |
| g_tmp_pos[f * 3 + 0] = g_pos_x[off]; | |
| g_tmp_pos[f * 3 + 1] = g_pos_y[off]; | |
| g_tmp_pos[f * 3 + 2] = g_pos_z[off]; | |
| } | |
| int kept_count; | |
| rdp_simplify(g_tmp_pos, frame_cnt, 3, eps_pos, g_kept, &kept_count); | |
| for (int kk = 0; kk < kept_count; ++kk) { | |
| set_bit(&g_used_pos_bits, g_kept[kk]); | |
| } | |
| // Rotation | |
| for (int f = 0; f < frame_cnt; ++f) { | |
| int off = j * frame_cnt + f; | |
| g_tmp_rot[f * 4 + 0] = g_rot_x[off]; | |
| g_tmp_rot[f * 4 + 1] = g_rot_y[off]; | |
| g_tmp_rot[f * 4 + 2] = g_rot_z[off]; | |
| g_tmp_rot[f * 4 + 3] = g_rot_w[off]; | |
| } | |
| rdp_simplify(g_tmp_rot, frame_cnt, 4, eps_rot, g_kept, &kept_count); | |
| for (int kk = 0; kk < kept_count; ++kk) { | |
| set_bit(&g_used_rot_bits, g_kept[kk]); | |
| } | |
| // Scale | |
| for (int f = 0; f < frame_cnt; ++f) { | |
| int off = j * frame_cnt + f; | |
| g_tmp_scl[f] = g_scl_s[off]; | |
| } | |
| rdp_simplify(g_tmp_scl, frame_cnt, 1, eps_scl, g_kept, &kept_count); | |
| for (int kk = 0; kk < kept_count; ++kk) { | |
| set_bit(&g_used_scl_bits, g_kept[kk]); | |
| } | |
| } | |
| // Build key lists | |
| int num_pos_keys = 0; | |
| for (int f = 0; f < frame_cnt; ++f) { | |
| if (test_bit(&g_used_pos_bits, f)) { | |
| g_key_pos_list[num_pos_keys++] = f; | |
| } | |
| } | |
| int num_rot_keys = 0; | |
| for (int f = 0; f < frame_cnt; ++f) { | |
| if (test_bit(&g_used_rot_bits, f)) { | |
| g_key_rot_list[num_rot_keys++] = f; | |
| } | |
| } | |
| int num_scl_keys = 0; | |
| for (int f = 0; f < frame_cnt; ++f) { | |
| if (test_bit(&g_used_scl_bits, f)) { | |
| g_key_scl_list[num_scl_keys++] = f; | |
| } | |
| } | |
| // Compute frame_to_key | |
| for (int i = 0; i < frame_cnt; ++i) { | |
| int k = num_pos_keys - 1; | |
| while (k >= 0 && g_key_pos_list[k] > i) { | |
| --k; | |
| } | |
| g_frame_to_key_pos[i] = (unsigned short)(k < 0 ? 0 : k); | |
| k = num_rot_keys - 1; | |
| while (k >= 0 && g_key_rot_list[k] > i) { | |
| --k; | |
| } | |
| g_frame_to_key_rot[i] = (unsigned short)(k < 0 ? 0 : k); | |
| k = num_scl_keys - 1; | |
| while (k >= 0 && g_key_scl_list[k] > i) { | |
| --k; | |
| } | |
| g_frame_to_key_scl[i] = (unsigned short)(k < 0 ? 0 : k); | |
| } | |
| // Compute quantization params | |
| float pos_scl_val = 1.0f; | |
| float scl_scl_val = 1.0f; | |
| float rot_scl_val = 1.0f; | |
| float pos_off = 0.0f; | |
| float scl_off = 0.0f; | |
| float rot_off = 0.0f; | |
| if (num_pos_keys > 0) { | |
| float pmin = INFINITY; | |
| float pmax = -INFINITY; | |
| for (int kk = 0; kk < num_pos_keys; ++kk) { | |
| int f = g_key_pos_list[kk]; | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| int off = j * frame_cnt + f; | |
| pmin = fminf(pmin, g_pos_x[off]); | |
| pmin = fminf(pmin, g_pos_y[off]); | |
| pmin = fminf(pmin, g_pos_z[off]); | |
| pmax = fmaxf(pmax, g_pos_x[off]); | |
| pmax = fmaxf(pmax, g_pos_y[off]); | |
| pmax = fmaxf(pmax, g_pos_z[off]); | |
| } | |
| } | |
| float range = pmax - pmin; | |
| pos_scl_val = (range > 0.0f) ? range / 65535.0f : 1.0f; | |
| pos_off = pmin; | |
| } | |
| if (num_scl_keys > 0) { | |
| float smin = INFINITY; | |
| float smax = -INFINITY; | |
| for (int kk = 0; kk < num_scl_keys; ++kk) { | |
| int f = g_key_scl_list[kk]; | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| int off = j * frame_cnt + f; | |
| smin = fminf(smin, g_scl_s[off]); | |
| smax = fmaxf(smax, g_scl_s[off]); | |
| } | |
| } | |
| float range = smax - smin; | |
| scl_scl_val = (range > 0.0f) ? range / 65535.0f : 1.0f; | |
| scl_off = smin; | |
| } | |
| // Generate keys | |
| for (int kk = 0; kk < num_pos_keys; ++kk) { | |
| int f = g_key_pos_list[kk]; | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| int idx = kk * joint_cnt + j; | |
| int off = j * frame_cnt + f; | |
| float valx = g_pos_x[off]; | |
| float qx = (valx - pos_off) / pos_scl_val; | |
| g_keys_pos_x[idx] = (unsigned short)roundf(fmaxf(0.0f, fminf(65535.0f, qx))); | |
| float valy = g_pos_y[off]; | |
| float qy = (valy - pos_off) / pos_scl_val; | |
| g_keys_pos_y[idx] = (unsigned short)roundf(fmaxf(0.0f, fminf(65535.0f, qy))); | |
| float valz = g_pos_z[off]; | |
| float qz = (valz - pos_off) / pos_scl_val; | |
| g_keys_pos_z[idx] = (unsigned short)roundf(fmaxf(0.0f, fminf(65535.0f, qz))); | |
| } | |
| } | |
| for (int kk = 0; kk < num_scl_keys; ++kk) { | |
| int f = g_key_scl_list[kk]; | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| int idx = kk * joint_cnt + j; | |
| int off = j * frame_cnt + f; | |
| float vals = g_scl_s[off]; | |
| float qs = (vals - scl_off) / scl_scl_val; | |
| g_keys_scl[idx] = (unsigned short)roundf(fmaxf(0.0f, fminf(65535.0f, qs))); | |
| } | |
| } | |
| for (int kk = 0; kk < num_rot_keys; ++kk) { | |
| int f = g_key_rot_list[kk]; | |
| for (int j = 0; j < joint_cnt; ++j) { | |
| int idx = kk * joint_cnt + j; | |
| int off = j * frame_cnt + f; | |
| float qin[4] = {g_rot_x[off], g_rot_y[off], g_rot_z[off], g_rot_w[off]}; | |
| g_keys_rot[idx] = qenc(qin); | |
| } | |
| } | |
| // Output to C file | |
| FILE *out = fopen(out_path, "w"); | |
| if (!out) { | |
| perror("Failed to open output file"); | |
| cgltf_free(scene); | |
| return 1; | |
| } | |
| size_t total_pos = (size_t)num_pos_keys * joint_cnt; | |
| fprintf(out, "static const unsigned short anm_%s_pos_x[] = {\n", clip_name); | |
| for (size_t ii = 0; ii < total_pos; ii += 8) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 8 && (ii + jj) < total_pos; ++jj) { | |
| if (!line_start) fputc(' ', out); | |
| fprintf(out, "0x%04x,", g_keys_pos_x[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| fprintf(out, "static const unsigned short anm_%s_pos_y[] = {\n", clip_name); | |
| for (size_t ii = 0; ii < total_pos; ii += 8) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 8 && (ii + jj) < total_pos; ++jj) { | |
| if (!line_start) fputc(' ', out); | |
| fprintf(out, "0x%04x,", g_keys_pos_y[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| fprintf(out, "static const unsigned short anm_%s_pos_z[] = {\n", clip_name); | |
| for (size_t ii = 0; ii < total_pos; ii += 8) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 8 && (ii + jj) < total_pos; ++jj) { | |
| if (!line_start) fputc(' ', out); | |
| fprintf(out, "0x%04x,", g_keys_pos_z[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| size_t total_rot = (size_t)num_rot_keys * joint_cnt; | |
| fprintf(out, "static const unsigned anm_%s_rot[] = {\n", clip_name); | |
| for (size_t ii = 0; ii < total_rot; ii += 8) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 8 && (ii + jj) < total_rot; ++jj) { | |
| if (!line_start) { | |
| fputc(' ', out); | |
| } | |
| fprintf(out, "0x%08x,", g_keys_rot[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| size_t total_scl = (size_t)num_scl_keys * joint_cnt; | |
| fprintf(out, "static const unsigned short anm_%s_scl[] = {\n", clip_name); | |
| for (size_t ii = 0; ii < total_scl; ii += 8) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 8 && (ii + jj) < total_scl; ++jj) { | |
| if (!line_start) { | |
| fputc(' ', out); | |
| } | |
| fprintf(out, "0x%04x,", g_keys_scl[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| fprintf(out, "static const unsigned short anm_%s_frame_to_key_pos[] = {\n", clip_name); | |
| for (int ii = 0; ii < frame_cnt; ii += 16) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 16 && (ii + jj) < frame_cnt; ++jj) { | |
| if (!line_start) { | |
| fputc(' ', out); | |
| } | |
| fprintf(out, "%hu,", g_frame_to_key_pos[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| fprintf(out, "static const unsigned short anm_%s_frame_to_key_rot[] = {\n", clip_name); | |
| for (int ii = 0; ii < frame_cnt; ii += 16) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 16 && (ii + jj) < frame_cnt; ++jj) { | |
| if (!line_start) { | |
| fputc(' ', out); | |
| } | |
| fprintf(out, "%hu,", g_frame_to_key_rot[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| fprintf(out, "static const unsigned short anm_%s_frame_to_key_scl[] = {\n", clip_name); | |
| for (int ii = 0; ii < frame_cnt; ii += 16) { | |
| bool line_start = true; | |
| for (int jj = 0; jj < 16 && (ii + jj) < frame_cnt; ++jj) { | |
| if (!line_start) { | |
| fputc(' ', out); | |
| } | |
| fprintf(out, "%hu,", g_frame_to_key_scl[ii + jj]); | |
| line_start = false; | |
| } | |
| fputc('\n', out); | |
| } | |
| fprintf(out, "};\n\n"); | |
| fprintf(out, "static const struct anm_clip anm_%s_clip = {\n", clip_name); | |
| fprintf(out, " .joint_cnt = %d,\n", joint_cnt); | |
| fprintf(out, " .frame_cnt = %d,\n", frame_cnt); | |
| fprintf(out, " .off = {%.6ff, %.6ff, %.6ff},\n", pos_off, rot_off, scl_off); | |
| fprintf(out, " .scl = {%.6ff, %.6ff, %.6ff},\n", pos_scl_val, rot_scl_val, scl_scl_val); | |
| fprintf(out, " .keys = {\n"); | |
| fprintf(out, " .pos_x = anm_%s_pos_x,\n", clip_name); | |
| fprintf(out, " .pos_y = anm_%s_pos_y,\n", clip_name); | |
| fprintf(out, " .pos_z = anm_%s_pos_z,\n", clip_name); | |
| fprintf(out, " .rot_pos = anm_%s_rot,\n", clip_name); | |
| fprintf(out, " .scl_s = anm_%s_scl,\n", clip_name); | |
| fprintf(out, " },\n"); | |
| fprintf(out, " .blks = {\n"); | |
| fprintf(out, " .frame_to_key_pos = anm_%s_frame_to_key_pos,\n", clip_name); | |
| fprintf(out, " .frame_to_key_rot = anm_%s_frame_to_key_rot,\n", clip_name); | |
| fprintf(out, " .frame_to_key_scl = anm_%s_frame_to_key_scl,\n", clip_name); | |
| fprintf(out, " },\n"); | |
| fprintf(out, "};\n"); | |
| fclose(out); | |
| cgltf_free(scene); | |
| cgltf_options_destroy(&options); | |
| printf("Generated animation clip '%s' with %d frames, pos_keys=%d, rot_keys=%d, scl_keys=%d\n", | |
| clip_name, frame_cnt, num_pos_keys, num_rot_keys, num_scl_keys); | |
| return 0; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #include "cgltf.h" | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <math.h> | |
| #include <string.h> | |
| #include <assert.h> | |
| #include <float.h> | |
| #include <stdbool.h> | |
| #include <stdint.h> | |
| #include <ctype.h> | |
| #define MAX_JOINTS 256 | |
| typedef float mat4[16]; | |
| static uint32_t | |
| fnv1a_32(const char *str) { | |
| uint32_t hash = 0x811c9dc5u; | |
| const unsigned char *s = (const unsigned char *)str; | |
| while (*s != '\0') { | |
| hash ^= *s++; | |
| hash *= 0x01000193u; | |
| } | |
| return hash; | |
| } | |
| static void | |
| mat4_id(mat4 m) { | |
| m[0] = m[5] = m[10] = m[15] = 1.0f; | |
| m[1] = m[2] = m[3] = m[4] = m[6] = m[7] = 0.0f; | |
| m[8] = m[9] = m[11] = m[12] = m[13] = m[14] = 0.0f; | |
| } | |
| static void | |
| mat4_mul(const mat4 restrict a, const mat4 restrict b, mat4 restrict out) { | |
| for (int i = 0; i < 4; ++i) { | |
| for (int j = 0; j < 4; ++j) { | |
| out[i * 4 + j] = 0.0f; | |
| for (int k = 0; k < 4; ++k) { | |
| out[i * 4 + j] += a[i * 4 + k] * b[k * 4 + j]; | |
| } | |
| } | |
| } | |
| } | |
| static int | |
| mat4_try_invert(const mat4 restrict m, mat4 restrict out) { | |
| float det = m[0] * (m[5] * m[10] - m[9] * m[6]) - | |
| m[4] * (m[1] * m[10] - m[9] * m[2]) + | |
| m[8] * (m[1] * m[6] - m[5] * m[2]); | |
| if (fabsf(det) < 1e-8f) { | |
| mat4_id(out); // fallback | |
| return 0; | |
| } | |
| float idet = 1.0f / det; | |
| out[0] = (m[5] * m[10] - m[6] * m[9]) * idet; | |
| out[1] = (m[9] * m[2] - m[1] * m[10]) * idet; | |
| out[2] = (m[1] * m[6] - m[5] * m[2]) * idet; | |
| out[4] = (m[6] * m[8] - m[4] * m[10]) * idet; | |
| out[5] = (m[0] * m[10] - m[8] * m[2]) * idet; | |
| out[6] = (m[4] * m[2] - m[0] * m[6]) * idet; | |
| out[8] = (m[4] * m[9] - m[5] * m[8]) * idet; | |
| out[9] = (m[8] * m[1] - m[0] * m[9]) * idet; | |
| out[10] = (m[0] * m[5] - m[4] * m[1]) * idet; | |
| out[3] = out[7] = out[11] = 0.0f; | |
| out[15] = 1.0f; | |
| out[12] = -(out[0] * m[12] + out[4] * m[13] + out[8] * m[14]); | |
| out[13] = -(out[1] * m[12] + out[5] * m[13] + out[9] * m[14]); | |
| out[14] = -(out[2] * m[12] + out[6] * m[13] + out[10] * m[14]); | |
| return 1; | |
| } | |
| static void | |
| mat4_compose(const float *restrict rot, const float *restrict pos, | |
| const float *restrict scl, mat4 out) { | |
| float xx = rot[0] * rot[0]; | |
| float xy = rot[0] * rot[1]; | |
| float xz = rot[0] * rot[2]; | |
| float xw = rot[0] * rot[3]; | |
| float yy = rot[1] * rot[1]; | |
| float yz = rot[1] * rot[2]; | |
| float yw = rot[1] * rot[3]; | |
| float zz = rot[2] * rot[2]; | |
| float zw = rot[2] * rot[3]; | |
| float rm00 = 1.0f - 2.0f * (yy + zz); | |
| float rm01 = 2.0f * (xy - zw); | |
| float rm02 = 2.0f * (xz + yw); | |
| float rm10 = 2.0f * (xy + zw); | |
| float rm11 = 1.0f - 2.0f * (xx + zz); | |
| float rm12 = 2.0f * (yz - xw); | |
| float rm20 = 2.0f * (xz - yw); | |
| float rm21 = 2.0f * (yz + xw); | |
| float rm22 = 1.0f - 2.0f * (xx + yy); | |
| out[0] = rm00 * scl[0]; | |
| out[1] = rm10 * scl[0]; | |
| out[2] = rm20 * scl[0]; | |
| out[4] = rm01 * scl[1]; | |
| out[5] = rm11 * scl[1]; | |
| out[6] = rm21 * scl[1]; | |
| out[8] = rm02 * scl[2]; | |
| out[9] = rm12 * scl[2]; | |
| out[10] = rm22 * scl[2]; | |
| out[3] = out[7] = out[11] = 0.0f; | |
| out[12] = pos[0]; | |
| out[13] = pos[1]; | |
| out[14] = pos[2]; | |
| out[15] = 1.0f; | |
| } | |
| extern int | |
| main(int argc, char **argv) { | |
| if (argc < 4) { | |
| fprintf(stderr, "Usage: %s <gltf_file> <output_c_file> <skeleton_name> [skin_name]\n", argv[0]); | |
| return 1; | |
| } | |
| const char *gltf_path = argv[1]; | |
| const char *out_path = argv[2]; | |
| const char *skeleton_name = argv[3]; | |
| const char *skin_name = (argc > 4) ? argv[4] : 0; | |
| /* load and parse the gltf file */ | |
| cgltf_options opt = {0}; | |
| cgltf_result res = cgltf_parse_file(&opt, gltf_path, &data); | |
| if (res != cgltf_result_success) { | |
| fprintf(stderr, "[GLTF]: Failed to parse GLTF: %d\n", res); | |
| return 1; | |
| } | |
| cgltf_data *data = 0; | |
| res = cgltf_load_buffers(&opt, data, gltf_path); | |
| if (res != cgltf_result_success) { | |
| fprintf(stderr, "[GLTF]: Failed to load GLTF buffers: %d\n", res); | |
| return 1; | |
| } | |
| /* try to find the skin */ | |
| const cgltf_skin *skn = 0; | |
| if (skin_name && skin_name[0] != '\0') { | |
| for (size_t s = 0u; s < data->skins_count; ++s) { | |
| const cgltf_skin *cur = &data->skins[s]; | |
| if (cur->name && !strcmp(cur->name, skin_name)) { | |
| skn = cur; | |
| break; | |
| } | |
| } | |
| if (!skn) { | |
| fprintf(stderr, "[GLTF]: Specified skin '%s' not found; falling back to first skin.\n", skin_name); | |
| } | |
| } | |
| if (!skn && data->skins_count > 0) { | |
| skn = &data->skins[0]; | |
| } | |
| if (!skn || skn->joints_count == 0) { | |
| fprintf(stderr, "[GLTF]: No skin or joints found\n"); | |
| return 1; | |
| } | |
| int skin_jnt_cnt = (int)skn->joints_count; | |
| if (skin_jnt_cnt > MAX_JOINTS) { | |
| fprintf(stderr, "[GLTF]: Too many joints: %d (max %d)\n", skin_jnt_cnt, MAX_JOINTS); | |
| return 1; | |
| } | |
| /* try to find skin root node */ | |
| const cgltf_node *root = 0; | |
| for (size_t i = 0; i < skin_jnt_cnt; ++i) { | |
| const cgltf_node *node = skn->joints[i]; | |
| if (!node->parent) { | |
| root = node; | |
| break; | |
| } | |
| } | |
| if (!root) { | |
| for (size_t i = 0; i < skin_jnt_cnt; ++i) { | |
| const cgltf_node *node = skn->joints[i]; | |
| if (node->parent && !node->parent->parent) { | |
| root = node->parent; | |
| break; | |
| } | |
| } | |
| } | |
| if (!root) { | |
| fprintf(stderr, "[GLTF]: Couldn't find the root node in skin: '%s'\n", skin_name); | |
| return 1; | |
| } | |
| /* iterate over hierarchy */ | |
| int bone_cnt = 0; | |
| struct skel_bone { | |
| int parent; | |
| unsigned hash; | |
| const char *name; | |
| mat4 local; | |
| mat4 skin_to_world; | |
| } | |
| skeleton[MAX_JOINTS]; | |
| { | |
| int stk_top = 0; | |
| struct stk_elm { | |
| int parent; | |
| const cgltf_node *node; | |
| mat4 skin_to_world; | |
| } stk[MAX_JOINTS]; | |
| { | |
| stk[stk_top].parent = -1; | |
| stk[stk_top].node = root; | |
| mat4_id(stk[stk_top].skin_to_world); | |
| stk_top++; | |
| } | |
| mat4 node_to_world[MAX_JOINTS]; | |
| while (stk_top > 1) { | |
| const struct stk_elm elm = stk[--stk_top]; | |
| const cgltf_node *cur = elm->node; | |
| /* skip non-skin joints */ | |
| size_t skin_jnt_idx = 0; | |
| for (; skin_jnt_idx < skin_jnt_cnt; ++skin_jnt_idx) { | |
| const cgltf_node *node = skn->joints[skin_jnt_idx]; | |
| if (cur == node) { | |
| break; | |
| } | |
| } | |
| if (skin_jnt_idx >= skin_jnt_cnt) { | |
| continue; | |
| } | |
| int bone_idx = bone_cnt++; | |
| struct skel_bone *bone = skeleton[bone_idx]; | |
| bone->parent = elm->parent; | |
| bone->name = cur->name; | |
| bone->hash = fnv1a_32(cur->name); | |
| mat4 jnt_to_world; | |
| if (cur->parent == -1) { | |
| memcpy(bone->local, cur->skin_to_world, sizeof(mat4)); | |
| mat4_id(jnt_to_world); | |
| } else { | |
| float pos[3] = {0,0,0}; | |
| if (node->has_translation) { | |
| pos[0] = node->translation[0]; | |
| pos[1] = node->translation[1]; | |
| pos[2] = node->translation[2]; | |
| } | |
| float rot[4] = {0,0,0,1.0f}; | |
| if (node->has_rotation) { | |
| rot[0] = node->rotation[0]; | |
| rot[1] = node->rotation[1]; | |
| rot[2] = node->rotation[2]; | |
| rot[3] = node->rotation[3]; | |
| } | |
| float scl[3] = {1.0f,1.0f,1.0f}; | |
| if (node->has_scale) { | |
| scl[0] = node->scale[0]; | |
| scl[1] = node->scale[1]; | |
| scl[2] = node->scale[2]; | |
| } | |
| mat4_compose(rot, pos, scl, bone->local); | |
| mat4_mul(bone->local, node_to_world[cur->parent], jnt_to_world); | |
| } | |
| memcpy(&node_to_world[bone_idx], jnt_to_world, sizeof(mat4)); | |
| // we want to have the skin to bone matrix so first create the world to skin matrix | |
| mat4 world_to_skin; | |
| if (!mat4_try_invert(cur->skin_to_world, world_to_skin)) { | |
| fprintf(stderr, "[GLTF]: failed to invert skin to world matrix in skin '%s' at joint '%s'\n", skin_name, bone->name); | |
| return 1; | |
| } | |
| // concatenate the bone to world and the world to skin matrices to one bone to skin matrix | |
| mat4 bone_to_skin; | |
| mat4_mul(jnt_to_world, world_to_skin, bone_to_skin); | |
| if (!mat4_try_invert(bone_to_skin, bone->skin_to_world)) { | |
| fprintf(stderr, "[GLTF]: failed to invert the bone to skin matrix in skin '%s' at joint '%s'\n", skin_name, bone->name); | |
| return 1; | |
| } | |
| // process all children (reverse to keep correct sequence through stack) | |
| for (size_t i = cur->children_count; i > 0; --i) { | |
| const struct stk_elm *elm = &stk[stk_top++]; | |
| elm->node = cur->children[i]; | |
| elm->parent = bone_idx; | |
| memcpy(elm->skin_to_world, world_to_skin, sizeof(mat4)); | |
| } | |
| } | |
| } | |
| // Output | |
| FILE *out = fopen(out_path, "w"); | |
| if (!out) { | |
| perror("Failed to open output file"); | |
| cgltf_free(data); | |
| return 1; | |
| } | |
| fprintf(out, "static const struct bone skl_%s[] = {\n", skeleton_name); | |
| for (int i = 0; i < joint_cnt; ++i) { | |
| const struct skel_bone *bone = &skeleton[i]; | |
| fprintf(out, " { .parent = %d, .hash = 0x%08x, .name = \"%s\", .skin_to_world = {", bone->parent, bone->hash, bone->name); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff },", bone->skin_to_world[0], bone->skin_to_world[1], bone->skin_to_world[2]); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff },", bone->skin_to_world[4], bone->skin_to_world[5], bone->skin_to_world[6]); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff },", bone->skin_to_world[8], bone->skin_to_world[9], bone->skin_to_world[10]); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff }", bone->skin_to_world[12], bone->skin_to_world[13], bone->skin_to_world[14]); | |
| fprintf(out, " },\n", ","); | |
| fprintf(out, " .pose = { %.6ff, %.6ff, %.6ff },", bone->local[0], bone->local[1], bone->local[2]); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff },", bone->local[4], bone->local[5], bone->local[6]); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff },", bone->local[8], bone->local[9], bone->local[10]); | |
| fprintf(out, " { %.6ff, %.6ff, %.6ff }", bone->local[12], bone->local[13], bone->local[14]); | |
| fprintf(out, " } }\n"); | |
| } | |
| fprintf(out, "};\n"); | |
| fclose(out); | |
| cgltf_free(data); | |
| cgltf_options_destroy(&opt); | |
| const char *skin_used = skin->name ? skin->name : "default"; | |
| printf("Generated skeleton '%s' with %d bones from skin '%s'\n", skeleton_name, joint_cnt, skin_used); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment