Skip to content

Instantly share code, notes, and snippets.

@vurtun
Last active October 23, 2025 09:59
Show Gist options
  • Select an option

  • Save vurtun/bbfedf979725f57c208be6ebda5c10f5 to your computer and use it in GitHub Desktop.

Select an option

Save vurtun/bbfedf979725f57c208be6ebda5c10f5 to your computer and use it in GitHub Desktop.
Multi-Core By Default
// https://www.rfleury.com/p/multi-core-by-default
// https://github.com/EpicGamesExt/raddebugger/blob/c738768e41153b8e598ef51aa57530cf71c19880/src/base/base_entry_point.c#L179
// https://github.com/EpicGamesExt/raddebugger/blob/master/src/radbin/radbin.c#L37
#include <pthread.h>
#ifdef __linux__
#define _GNU_SOURCE // For pthread_setaffinity_np and sched.h extensions
#include <sched.h> // For sched_param, SCHED_FIFO, sched_yield
#include <sys/sysinfo.h> // For fallback cpu_cnt
#endif
#ifdef __APPLE__
#include <mach/thread_policy.h> // For macOS real-time (Mach)
#include <mach/mach_init.h> // mach_thread_self
#include <sys/sysctl.h>
#endif
#define MAX_THRD_CNT 256
struct thrd_ctx {
int lane_idx;
int lane_count;
sys_barrier *barrier;
void *broadcast_memory;
};
#define sys_barrier pthread_barrier_t
static sys_barrier*
sys_barrier_mk(int cnt) {
sys_barrier* barrier;
pthread_barrier_init(&barrier, 0, cnt);
return barrier;
}
static void
sys_barrier_del(sys_barrier* barrier) {
pthread_barrier_destroy(barrier)
}
static void
sys_barrier_wait(sys_barrier* barrier) {
pthread_barrier_wait(barrier);
}
#define lane_idx(ctx) ((ctx)->lane_idx)
#define lane_cnt(ctx) ((ctx)->lane_count)
#define lane_sync(ctx) sys_barrier_wait((ctx)->barrier)
static void
thrd_entry_point(void *params) {
struct sched_param param;
param.sched_priority = 80;
if (pthread_setschedparam(pthread_self(), SCHED_FIFO, &param) != 0) {
// Fallback: errno check, e.g., EPERM if no privileges
}
struct thrd_ctx* ctx = (struct thrd_ctx)params;
if (lane_idx(ctx) == 0) {
}
lane_sync(ctx);
}
#ifdef __linux__
static int
cpu_get_p_core_cnt(int *p_core_list, int max_cpus) {
FILE *fp = fopen("/sys/devices/cpu_core/cpus", "r");
if (!fp) {
// Non-hybrid fallback: all cores are "P"
int cpu_cnt = get_nprocs();
for (int i = 0; i < cpu_cnt && i < max_cpus; ++i) {
p_core_list[i] = i;
}
return cpu_cnt;
}
char buf[256];
fgets(buf, sizeof(buf), fp);
fclose(fp);
// Parse range like "0-7,16-23" (simple comma-separated; assumes no gaps)
int count = 0, start, end;
char *range = strtok(buf, ",");
while (range && count < max_cpus) {
if (sscanf(range, "%d-%d", &start, &end) == 2) {
for (int i = start; i <= end && count < max_cpus; ++i) {
p_core_list[count++] = i;
}
} else if (sscanf(range, "%d", &start) == 1) {
p_core_list[count++] = start;
}
range = strtok(NULL, ",");
}
return count;
}
#elif defined(__APPLE__)
static int
cpu_get_p_core_cnt(void) {
int p_cores = 0, size = sizeof(p_cores);
if (sysctlbyname("hw.perflevel0.physicalcpu", &p_cores, &size, NULL, 0) != 0) {
// Fallback: total logical cores (treat as all P)
size_t total_size = sizeof(int);
int total_cores;
sysctlbyname("hw.ncpu", &total_cores, &total_size, NULL, 0);
return total_cores;
}
return p_cores; // Physical P-cores (e.g., 4 on M3 base)
}
#endif
int main(void) {
#ifdef __linux__
int p_core_list[MAX_THRD_CNT];
int p_core_cnt = cpu_get_p_core_count(p_core_list, MAX_THRD_CNT);
int thrd_cnt = min(p_core_cnt, MAX_THRD_CNT); // Use P-cores only
#else
int p_core_cnt = cpu_get_p_core_count();
int thrd_cnt = min(p_core_cnt, MAX_THRD_CNT); // Threads = physical P-cores
#endif
pthread_t threads[MAX_THRD_CNT];
struct thrd_ctx lane_ctx[MAX_THRD_CNT];
thrd_cnt = max(1, min(thrd_cnt, MAX_THRD_CNT));
sys_barrier *barrier = sys_barrier_mk(thrd_cnt);
pthread_attr_t attr;
{
struct sched_param param;
pthread_attr_init(&attr);
param.sched_priority = 80;
pthread_attr_setschedpolicy(&attr, SCHED_FIFO);
pthread_attr_setschedparam(&attr, &param);
pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); // Override parent policy
}
for (int i = 0; i < thrd_cnt; i++) {
lane_ctx[i].lane_idx = i;
lane_ctx[i].barrier = barrier;
lane_ctx[i].broadcast_memory = 0;
lane_ctx[i].lane_count = thrd_cnt;
if (pthread_create(&threads[i], &attr, thrd_entry_point, &lane_ctx[i]) != 0) {
}
// Pin thread i to core i
#ifdef __APPLE__
thread_affinity_policy_data_t policy;
policy.affinity_tag = i % cpu_cnt; // Tag-based pinning (macOS prefers tags over masks for efficiency)
mach_port_t mach_thread = pthread_mach_thread_np(threads[i]); // Get Mach port
if (thread_policy_set(mach_thread, THREAD_AFFINITY_POLICY, (thread_policy_t)&policy, THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS) {
// Handle error
}
#elif __linux__
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(p_core_list[i], &cpuset); // Pin to specific P-core CPU ID
pthread_setaffinity_np(threads[i], sizeof(cpu_set_t), &cpuset);
if (pthread_setaffinity_np(threads[i], sizeof(cpu_set_t), &cpuset) != 0) {
// Handle error (e.g., log "Failed to pin thread %d", i)
}
#endif
}
for (int i = 0; i < thrd_cnt; i++) {
pthread_join(threads[i], NULL);
}
sys_barrier_del(barrier); // Fixed: added ;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment