-
-
Save vurtun/bbfedf979725f57c208be6ebda5c10f5 to your computer and use it in GitHub Desktop.
Multi-Core By Default
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // https://www.rfleury.com/p/multi-core-by-default | |
| // https://github.com/EpicGamesExt/raddebugger/blob/c738768e41153b8e598ef51aa57530cf71c19880/src/base/base_entry_point.c#L179 | |
| // https://github.com/EpicGamesExt/raddebugger/blob/master/src/radbin/radbin.c#L37 | |
| #include <pthread.h> | |
| #ifdef __linux__ | |
| #define _GNU_SOURCE // For pthread_setaffinity_np and sched.h extensions | |
| #include <sched.h> // For sched_param, SCHED_FIFO, sched_yield | |
| #include <sys/sysinfo.h> // For fallback cpu_cnt | |
| #endif | |
| #ifdef __APPLE__ | |
| #include <mach/thread_policy.h> // For macOS real-time (Mach) | |
| #include <mach/mach_init.h> // mach_thread_self | |
| #include <sys/sysctl.h> | |
| #endif | |
| #define MAX_THRD_CNT 256 | |
| struct thrd_ctx { | |
| int lane_idx; | |
| int lane_count; | |
| sys_barrier *barrier; | |
| void *broadcast_memory; | |
| }; | |
| #define sys_barrier pthread_barrier_t | |
| static sys_barrier* | |
| sys_barrier_mk(int cnt) { | |
| sys_barrier* barrier; | |
| pthread_barrier_init(&barrier, 0, cnt); | |
| return barrier; | |
| } | |
| static void | |
| sys_barrier_del(sys_barrier* barrier) { | |
| pthread_barrier_destroy(barrier) | |
| } | |
| static void | |
| sys_barrier_wait(sys_barrier* barrier) { | |
| pthread_barrier_wait(barrier); | |
| } | |
| #define lane_idx(ctx) ((ctx)->lane_idx) | |
| #define lane_cnt(ctx) ((ctx)->lane_count) | |
| #define lane_sync(ctx) sys_barrier_wait((ctx)->barrier) | |
| static void | |
| thrd_entry_point(void *params) { | |
| struct sched_param param; | |
| param.sched_priority = 80; | |
| if (pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m) != 0) { | |
| // Fallback: errno check, e.g., EPERM if no privileges | |
| } | |
| struct thrd_ctx* ctx = (struct thrd_ctx)params; | |
| if (lane_idx(ctx) == 0) { | |
| } | |
| lane_sync(ctx); | |
| } | |
| #ifdef __linux__ | |
| static int | |
| cpu_get_p_core_cnt(int *p_core_list, int max_cpus) { | |
| FILE *fp = fopen("/sys/devices/cpu_core/cpus", "r"); | |
| if (!fp) { | |
| // Non-hybrid fallback: all cores are "P" | |
| int cpu_cnt = get_nprocs(); | |
| for (int i = 0; i < cpu_cnt && i < max_cpus; ++i) { | |
| p_core_list[i] = i; | |
| } | |
| return cpu_cnt; | |
| } | |
| char buf[256]; | |
| fgets(buf, sizeof(buf), fp); | |
| fclose(fp); | |
| // Parse range like "0-7,16-23" (simple comma-separated; assumes no gaps) | |
| int count = 0, start, end; | |
| char *range = strtok(buf, ","); | |
| while (range && count < max_cpus) { | |
| if (sscanf(range, "%d-%d", &start, &end) == 2) { | |
| for (int i = start; i <= end && count < max_cpus; ++i) { | |
| p_core_list[count++] = i; | |
| } | |
| } else if (sscanf(range, "%d", &start) == 1) { | |
| p_core_list[count++] = start; | |
| } | |
| range = strtok(NULL, ","); | |
| } | |
| return count; | |
| } | |
| #elif defined(__APPLE__) | |
| static int | |
| cpu_get_p_core_cnt(void) { | |
| int p_cores = 0, size = sizeof(p_cores); | |
| if (sysctlbyname("hw.perflevel0.physicalcpu", &p_cores, &size, NULL, 0) != 0) { | |
| // Fallback: total logical cores (treat as all P) | |
| size_t total_size = sizeof(int); | |
| int total_cores; | |
| sysctlbyname("hw.ncpu", &total_cores, &total_size, NULL, 0); | |
| return total_cores; | |
| } | |
| return p_cores; // Physical P-cores (e.g., 4 on M3 base) | |
| } | |
| #endif | |
| int main(void) { | |
| #ifdef __linux__ | |
| int p_core_list[MAX_THRD_CNT]; | |
| int p_core_cnt = cpu_get_p_core_count(p_core_list, MAX_THRD_CNT); | |
| int thrd_cnt = min(p_core_cnt, MAX_THRD_CNT); // Use P-cores only | |
| #else | |
| int p_core_cnt = cpu_get_p_core_count(); | |
| int thrd_cnt = min(p_core_cnt, MAX_THRD_CNT); // Threads = physical P-cores | |
| #endif | |
| pthread_t threads[MAX_THRD_CNT]; | |
| struct thrd_ctx lane_ctx[MAX_THRD_CNT]; | |
| thrd_cnt = max(1, min(thrd_cnt, MAX_THRD_CNT)); | |
| sys_barrier *barrier = sys_barrier_mk(thrd_cnt); | |
| pthread_attr_t attr; | |
| { | |
| struct sched_param param; | |
| pthread_attr_init(&attr); | |
| param.sched_priority = 80; | |
| pthread_attr_setschedpolicy(&attr, SCHED_FIFO); | |
| pthread_attr_setschedparam(&attr, ¶m); | |
| pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); // Override parent policy | |
| } | |
| for (int i = 0; i < thrd_cnt; i++) { | |
| lane_ctx[i].lane_idx = i; | |
| lane_ctx[i].barrier = barrier; | |
| lane_ctx[i].broadcast_memory = 0; | |
| lane_ctx[i].lane_count = thrd_cnt; | |
| if (pthread_create(&threads[i], &attr, thrd_entry_point, &lane_ctx[i]) != 0) { | |
| } | |
| // Pin thread i to core i | |
| #ifdef __APPLE__ | |
| thread_affinity_policy_data_t policy; | |
| policy.affinity_tag = i % cpu_cnt; // Tag-based pinning (macOS prefers tags over masks for efficiency) | |
| mach_port_t mach_thread = pthread_mach_thread_np(threads[i]); // Get Mach port | |
| if (thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) { | |
| // Handle error | |
| } | |
| #elif __linux__ | |
| cpu_set_t cpuset; | |
| CPU_ZERO(&cpuset); | |
| CPU_SET(p_core_list[i], &cpuset); // Pin to specific P-core CPU ID | |
| pthread_setaffinity_np(threads[i], sizeof(cpu_set_t), &cpuset); | |
| if (pthread_setaffinity_np(threads[i], sizeof(cpu_set_t), &cpuset) != 0) { | |
| // Handle error (e.g., log "Failed to pin thread %d", i) | |
| } | |
| #endif | |
| } | |
| for (int i = 0; i < thrd_cnt; i++) { | |
| pthread_join(threads[i], NULL); | |
| } | |
| sys_barrier_del(barrier); // Fixed: added ; | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment