Created
January 14, 2026 09:29
-
-
Save rrampage/92f0eb6bf56d7bb403aff069cc8f1d6b to your computer and use it in GitHub Desktop.
A userspace sandbox which uses SOCKS proxy to restrict network access (inspired by oniux)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #define _GNU_SOURCE | |
| #include <arpa/inet.h> | |
| #include <errno.h> | |
| #include <fcntl.h> | |
| #include <linux/capability.h> | |
| #include <linux/if.h> | |
| #include <linux/if_tun.h> | |
| #include <linux/netlink.h> | |
| #include <linux/rtnetlink.h> | |
| #include <netdb.h> | |
| #include <netinet/in.h> | |
| #include <netinet/ip.h> | |
| #include <netinet/tcp.h> | |
| #include <netinet/udp.h> | |
| #include <poll.h> | |
| #include <sched.h> | |
| #include <stdio.h> | |
| #include <stdlib.h> | |
| #include <string.h> | |
| #include <sys/epoll.h> | |
| #include <sys/ioctl.h> | |
| #include <sys/mount.h> | |
| #include <sys/prctl.h> | |
| #include <sys/socket.h> | |
| #include <sys/stat.h> | |
| #include <sys/syscall.h> | |
| #include <sys/types.h> | |
| #include <sys/wait.h> | |
| #include <time.h> | |
| #include <unistd.h> | |
| #define MAX_TCP 128 | |
| #define MAX_UDP 64 | |
| #define MAX_EVENTS 64 | |
| #define EPOLL_TIMEOUT_MS 100 | |
| /* Epoll event data wrapper */ | |
| enum fd_type { FD_TUN = 1, FD_TCP, FD_UDP_RELAY }; | |
| struct epoll_wrapper { | |
| enum fd_type type; | |
| int fd; | |
| void *flow; /* Points to tcp_flow or udp_flow */ | |
| }; | |
| static int g_epfd = -1; /* Global epoll fd */ | |
| /* SOCKS5 proxy configuration */ | |
| struct socks_config { | |
| char host[256]; | |
| int port; | |
| char username[256]; | |
| char password[256]; | |
| int enabled; | |
| }; | |
| static struct socks_config socks_proxy = {0}; | |
| static int keep_root = 0; /* If 0, drop capabilities and run as host user */ | |
| static int verbose = 0; /* Verbose debug output */ | |
| /* Debug macro - only prints if verbose mode enabled */ | |
| #define DBG(fmt, ...) \ | |
| do { \ | |
| if (verbose) \ | |
| fprintf(stderr, "[sockpuppet] " fmt "\n", ##__VA_ARGS__); \ | |
| } while (0) | |
| /* Host gateway configuration - map 10.0.1.x to 127.0.0.x */ | |
| #define HOST_PING_IP 0x0100000a /* 10.0.0.1 - only for ping */ | |
| #define HOST_GATEWAY_BASE 0x0001000a /* 10.0.1.0 network byte order */ | |
| #define HOST_GATEWAY_MASK 0x00ffffff /* /24 mask for 10.0.1.x */ | |
| #define LOCALHOST_BASE 0x0000007f /* 127.0.0.0 network byte order */ | |
| #define MAX_HOST_RULES 64 | |
| struct host_rule { | |
| uint8_t last_octet; /* x in 127.0.0.x (1-255), 0 = wildcard */ | |
| uint16_t port; /* port number, 0 = all ports */ | |
| int proto; /* IPPROTO_TCP, IPPROTO_UDP, or 0 for both */ | |
| int wildcard_ip; /* match all 127.0.0.x */ | |
| int wildcard_port; /* match all ports */ | |
| }; | |
| static struct host_rule host_rules[MAX_HOST_RULES]; | |
| static int host_rule_count = 0; | |
| static int host_allow_all = 0; /* --host=* */ | |
| /* Check if IP is in gateway range (10.0.1.0/24) */ | |
| static int is_gateway_ip(uint32_t ip) { | |
| return (ip & HOST_GATEWAY_MASK) == HOST_GATEWAY_BASE; | |
| } | |
| /* Extract last octet from gateway IP (10.0.1.x -> x) */ | |
| static uint8_t gateway_last_octet(uint32_t ip) { | |
| return (uint8_t)((ip >> 24) & 0xff); | |
| } | |
| /* Convert gateway IP to localhost (10.0.1.x -> 127.0.0.x) */ | |
| static uint32_t gateway_to_localhost(uint32_t gw_ip) { | |
| uint8_t last = gateway_last_octet(gw_ip); | |
| return LOCALHOST_BASE | ((uint32_t)last << 24); | |
| } | |
| /* Check if gateway access is allowed for given IP, port, and protocol */ | |
| static int is_gateway_allowed(uint32_t gw_ip, uint16_t port, int proto) { | |
| if (!is_gateway_ip(gw_ip)) | |
| return 0; | |
| if (host_allow_all) | |
| return 1; | |
| uint8_t last = gateway_last_octet(gw_ip); | |
| for (int i = 0; i < host_rule_count; i++) { | |
| struct host_rule *r = &host_rules[i]; | |
| int ip_match = r->wildcard_ip || (r->last_octet == last); | |
| int port_match = r->wildcard_port || (r->port == port); | |
| int proto_match = (r->proto == 0) || (r->proto == proto); | |
| if (ip_match && port_match && proto_match) | |
| return 1; | |
| } | |
| return 0; | |
| } | |
| /* Rate limiting */ | |
| static time_t rate_limit_time = 0; | |
| static int rate_limit_count = 0; | |
| #define MAX_CONNECTS_PER_SEC 50 | |
| static int check_rate_limit(void) { | |
| time_t now = time(NULL); | |
| if (now != rate_limit_time) { | |
| rate_limit_time = now; | |
| rate_limit_count = 0; | |
| } | |
| return (++rate_limit_count <= MAX_CONNECTS_PER_SEC); | |
| } | |
| /* TCP connection states */ | |
| enum tcp_state { | |
| SP_TCP_CLOSED = 0, | |
| SP_TCP_SYN_RECEIVED, | |
| SP_TCP_ESTABLISHED, | |
| SP_TCP_FIN_WAIT_1, | |
| SP_TCP_FIN_WAIT_2, | |
| SP_TCP_CLOSE_WAIT, | |
| SP_TCP_CLOSING, | |
| SP_TCP_LAST_ACK, | |
| SP_TCP_TIME_WAIT | |
| }; | |
| struct tcp_flow { | |
| uint32_t cli_ip; | |
| uint16_t cli_port; | |
| uint32_t srv_ip; | |
| uint16_t srv_port; | |
| uint32_t cli_isn; | |
| uint32_t srv_isn; | |
| uint32_t cli_next; | |
| uint32_t srv_next; | |
| int sock; | |
| enum tcp_state state; | |
| time_t last_active; | |
| /* TCP timestamp option (RFC 7323) */ | |
| int ts_ok; /* Timestamps negotiated */ | |
| uint32_t ts_recent; /* Last TSval received from client */ | |
| struct epoll_wrapper ew; /* Epoll registration */ | |
| }; | |
| static struct tcp_flow tcp_flows[MAX_TCP]; | |
| struct udp_flow { | |
| uint32_t cli_ip; | |
| uint16_t cli_port; | |
| uint32_t srv_ip; | |
| uint16_t srv_port; | |
| int tcp_ctrl; /* SOCKS5 TCP control connection (must stay open) */ | |
| int udp_relay; /* UDP socket to SOCKS relay */ | |
| time_t last_used; /* Last activity timestamp */ | |
| struct sockaddr_in relay_addr; /* Expected relay source for validation */ | |
| struct epoll_wrapper ew; /* Epoll registration */ | |
| }; | |
| static struct udp_flow udp_flows[MAX_UDP]; | |
| /* ---------- utilities ---------- */ | |
| static void die(const char *msg) { | |
| perror(msg); | |
| exit(1); | |
| } | |
| /* Helper to suppress unused result warnings from FORTIFY_SOURCE */ | |
| #define IGNORE_RESULT(x) \ | |
| do { \ | |
| if (x) { \ | |
| } \ | |
| } while (0) | |
| static void write_file(const char *path, const char *data) { | |
| int fd = open(path, O_WRONLY); | |
| if (fd < 0) | |
| die(path); | |
| if (write(fd, data, strlen(data)) != (ssize_t)strlen(data)) | |
| die("write"); | |
| close(fd); | |
| } | |
| /* ---------- epoll helpers ---------- */ | |
| static void epoll_add_tcp(struct tcp_flow *f) { | |
| if (f->sock < 0 || g_epfd < 0) | |
| return; | |
| f->ew.type = FD_TCP; | |
| f->ew.fd = f->sock; | |
| f->ew.flow = f; | |
| struct epoll_event ev = {.events = EPOLLIN, .data.ptr = &f->ew}; | |
| epoll_ctl(g_epfd, EPOLL_CTL_ADD, f->sock, &ev); | |
| } | |
| static void epoll_add_udp(struct udp_flow *f) { | |
| if (f->udp_relay < 0 || g_epfd < 0) | |
| return; | |
| f->ew.type = FD_UDP_RELAY; | |
| f->ew.fd = f->udp_relay; | |
| f->ew.flow = f; | |
| struct epoll_event ev = {.events = EPOLLIN, .data.ptr = &f->ew}; | |
| epoll_ctl(g_epfd, EPOLL_CTL_ADD, f->udp_relay, &ev); | |
| } | |
| static void epoll_del(int fd) { | |
| if (fd >= 0 && g_epfd >= 0) | |
| epoll_ctl(g_epfd, EPOLL_CTL_DEL, fd, NULL); | |
| } | |
| /* Drop all capabilities (for rootless mode) */ | |
| static void drop_caps(void) { | |
| struct __user_cap_header_struct hdr = { | |
| .version = _LINUX_CAPABILITY_VERSION_3, | |
| .pid = 0, | |
| }; | |
| struct __user_cap_data_struct data[2] = {{0}}; | |
| if (syscall(SYS_capset, &hdr, data) < 0) { | |
| perror("capset"); | |
| exit(1); | |
| } | |
| /* Drop bounding set - EINVAL/EPERM expected when cap doesn't exist or no | |
| * CAP_SETPCAP */ | |
| for (int cap = 0; cap <= CAP_LAST_CAP; cap++) { | |
| if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0) < 0 && errno != EINVAL && | |
| errno != EPERM) { | |
| perror("PR_CAPBSET_DROP"); | |
| exit(1); | |
| } | |
| } | |
| /* Clear ambient capabilities - EINVAL/EPERM expected */ | |
| if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0) < 0 && | |
| errno != EINVAL && errno != EPERM) { | |
| perror("PR_CAP_AMBIENT_CLEAR_ALL"); | |
| exit(1); | |
| } | |
| /* Disable core dumps (prevents leaking sensitive data) */ | |
| if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) { | |
| perror("PR_SET_DUMPABLE"); | |
| exit(1); | |
| } | |
| /* Set NO_NEW_PRIVS */ | |
| if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) { | |
| perror("PR_SET_NO_NEW_PRIVS"); | |
| exit(1); | |
| } | |
| } | |
| /* Parse SOCKS URL: [socks5://][user:pass@]host:port */ | |
| static int parse_socks_url(const char *url, struct socks_config *cfg) { | |
| const char *p = url; | |
| memset(cfg, 0, sizeof(*cfg)); | |
| cfg->port = 1080; /* default SOCKS port */ | |
| /* Skip protocol prefix if present */ | |
| if (strncmp(p, "socks5://", 9) == 0) | |
| p += 9; | |
| else if (strncmp(p, "socks://", 8) == 0) | |
| p += 8; | |
| else if (strncmp(p, "socks5h://", 10) == 0) | |
| p += 10; | |
| /* Check for authentication (user:pass@) */ | |
| const char *at = strchr(p, '@'); | |
| if (at) { | |
| const char *colon = strchr(p, ':'); | |
| if (colon && colon < at) { | |
| size_t ulen = (size_t)(colon - p); | |
| size_t plen = (size_t)(at - colon - 1); | |
| if (ulen < sizeof(cfg->username) && plen < sizeof(cfg->password)) { | |
| strncpy(cfg->username, p, ulen); | |
| strncpy(cfg->password, colon + 1, plen); | |
| } | |
| } | |
| p = at + 1; | |
| } | |
| /* Parse host:port */ | |
| const char *colon = strrchr(p, ':'); | |
| if (colon) { | |
| size_t hlen = (size_t)(colon - p); | |
| if (hlen < sizeof(cfg->host)) { | |
| strncpy(cfg->host, p, hlen); | |
| cfg->port = atoi(colon + 1); | |
| } | |
| } else { | |
| strncpy(cfg->host, p, sizeof(cfg->host) - 1); | |
| } | |
| cfg->enabled = (cfg->host[0] != '\0'); | |
| return cfg->enabled; | |
| } | |
| /* Connect to target via SOCKS5 proxy. Returns socket fd or -1 on error. */ | |
| static int socks5_connect(struct socks_config *cfg, uint32_t target_ip, | |
| uint16_t target_port) { | |
| /* Connect to SOCKS proxy */ | |
| int sock = socket(AF_INET, SOCK_STREAM, 0); | |
| if (sock < 0) | |
| return -1; | |
| struct sockaddr_in proxy_addr = { | |
| .sin_family = AF_INET, | |
| .sin_port = htons((uint16_t)cfg->port), | |
| }; | |
| if (inet_pton(AF_INET, cfg->host, &proxy_addr.sin_addr) <= 0) { | |
| /* Try hostname resolution with getaddrinfo (thread-safe) */ | |
| struct addrinfo hints = {.ai_family = AF_INET, .ai_socktype = SOCK_STREAM}; | |
| struct addrinfo *res; | |
| if (getaddrinfo(cfg->host, NULL, &hints, &res) != 0) { | |
| close(sock); | |
| return -1; | |
| } | |
| proxy_addr.sin_addr = ((struct sockaddr_in *)res->ai_addr)->sin_addr; | |
| freeaddrinfo(res); | |
| } | |
| if (connect(sock, (struct sockaddr *)&proxy_addr, sizeof(proxy_addr)) < 0) { | |
| close(sock); | |
| return -1; | |
| } | |
| /* SOCKS5 handshake */ | |
| uint8_t greeting[4]; | |
| int has_auth = cfg->username[0] != '\0'; | |
| if (has_auth) { | |
| /* Offer no-auth and username/password auth */ | |
| greeting[0] = 0x05; /* VER */ | |
| greeting[1] = 0x02; /* NMETHODS */ | |
| greeting[2] = 0x00; /* NO AUTH */ | |
| greeting[3] = 0x02; /* USERNAME/PASSWORD */ | |
| if (write(sock, greeting, 4) != 4) { | |
| close(sock); | |
| return -1; | |
| } | |
| } else { | |
| /* No auth only */ | |
| greeting[0] = 0x05; | |
| greeting[1] = 0x01; | |
| greeting[2] = 0x00; | |
| if (write(sock, greeting, 3) != 3) { | |
| close(sock); | |
| return -1; | |
| } | |
| } | |
| /* Read server response */ | |
| uint8_t resp[2]; | |
| if (read(sock, resp, 2) != 2 || resp[0] != 0x05) { | |
| close(sock); | |
| return -1; | |
| } | |
| /* Handle authentication */ | |
| if (resp[1] == 0x02) { | |
| /* Username/password auth (RFC 1929) */ | |
| if (!has_auth) { | |
| close(sock); | |
| return -1; | |
| } | |
| size_t ulen = strlen(cfg->username); | |
| size_t plen = strlen(cfg->password); | |
| uint8_t auth[512]; | |
| size_t off = 0; | |
| auth[off++] = 0x01; /* VER */ | |
| auth[off++] = (uint8_t)ulen; | |
| memcpy(auth + off, cfg->username, ulen); | |
| off += ulen; | |
| auth[off++] = (uint8_t)plen; | |
| memcpy(auth + off, cfg->password, plen); | |
| off += plen; | |
| if (write(sock, auth, off) != (ssize_t)off) { | |
| close(sock); | |
| return -1; | |
| } | |
| uint8_t auth_resp[2]; | |
| if (read(sock, auth_resp, 2) != 2 || auth_resp[1] != 0x00) { | |
| close(sock); | |
| return -1; | |
| } | |
| } else if (resp[1] != 0x00) { | |
| /* Unsupported or no acceptable method */ | |
| close(sock); | |
| return -1; | |
| } | |
| /* SOCKS5 CONNECT request */ | |
| uint8_t req[10]; | |
| req[0] = 0x05; /* VER */ | |
| req[1] = 0x01; /* CMD = CONNECT */ | |
| req[2] = 0x00; /* RSV */ | |
| req[3] = 0x01; /* ATYP = IPv4 */ | |
| memcpy(req + 4, &target_ip, 4); /* DST.ADDR (already network order) */ | |
| uint16_t port_be = htons(target_port); | |
| memcpy(req + 8, &port_be, 2); /* DST.PORT */ | |
| if (write(sock, req, 10) != 10) { | |
| close(sock); | |
| return -1; | |
| } | |
| /* Read CONNECT response */ | |
| uint8_t resp_hdr[10]; | |
| if (read(sock, resp_hdr, 10) < 4) { | |
| close(sock); | |
| return -1; | |
| } | |
| if (resp_hdr[0] != 0x05 || resp_hdr[1] != 0x00) { | |
| close(sock); | |
| return -1; | |
| } | |
| return sock; | |
| } | |
| /* ---------- FD passing ---------- */ | |
| static void send_fd(int sock, int fd) { | |
| struct msghdr msg; | |
| memset(&msg, 0, sizeof(msg)); | |
| char byte = 'X'; | |
| struct iovec iov = {&byte, 1}; | |
| msg.msg_iov = &iov; | |
| msg.msg_iovlen = 1; | |
| char cbuf[CMSG_SPACE(sizeof(int))]; | |
| msg.msg_control = cbuf; | |
| msg.msg_controllen = sizeof(cbuf); | |
| struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); | |
| cmsg->cmsg_level = SOL_SOCKET; | |
| cmsg->cmsg_type = SCM_RIGHTS; | |
| cmsg->cmsg_len = CMSG_LEN(sizeof(int)); | |
| memcpy(CMSG_DATA(cmsg), &fd, sizeof(int)); | |
| if (sendmsg(sock, &msg, 0) < 0) | |
| die("sendmsg"); | |
| } | |
| static int recv_fd(int sock) { | |
| struct msghdr msg; | |
| memset(&msg, 0, sizeof(msg)); | |
| char byte; | |
| struct iovec iov = {&byte, 1}; | |
| msg.msg_iov = &iov; | |
| msg.msg_iovlen = 1; | |
| char cbuf[CMSG_SPACE(sizeof(int))]; | |
| msg.msg_control = cbuf; | |
| msg.msg_controllen = sizeof(cbuf); | |
| if (recvmsg(sock, &msg, 0) < 0) | |
| die("recvmsg"); | |
| struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); | |
| int fd; | |
| memcpy(&fd, CMSG_DATA(cmsg), sizeof(int)); | |
| return fd; | |
| } | |
| /* ---------- TUN and interface helpers for child process ---------- */ | |
| static int tun_create(const char *name) { | |
| int fd = open("/dev/net/tun", O_RDWR); | |
| if (fd < 0) | |
| die("open /dev/net/tun"); | |
| struct ifreq ifr; | |
| memset(&ifr, 0, sizeof(ifr)); | |
| ifr.ifr_flags = IFF_TUN | IFF_NO_PI; | |
| strncpy(ifr.ifr_name, name, IFNAMSIZ); | |
| if (ioctl(fd, TUNSETIFF, &ifr) < 0) | |
| die("TUNSETIFF"); | |
| return fd; | |
| } | |
| static void if_up(const char *ifname) { | |
| int s = socket(AF_INET, SOCK_DGRAM, 0); | |
| if (s < 0) | |
| die("socket"); | |
| struct ifreq ifr; | |
| memset(&ifr, 0, sizeof(ifr)); | |
| strncpy(ifr.ifr_name, ifname, IFNAMSIZ); | |
| if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) | |
| die("SIOCGIFFLAGS"); | |
| ifr.ifr_flags |= IFF_UP | IFF_RUNNING; | |
| if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) | |
| die("SIOCSIFFLAGS"); | |
| close(s); | |
| } | |
| /* point-to-point address */ | |
| static void if_addr_ptp(const char *ifname, const char *local, | |
| const char *peer) { | |
| int s = socket(AF_INET, SOCK_DGRAM, 0); | |
| struct ifreq ifr = {0}; | |
| struct sockaddr_in addr = {.sin_family = AF_INET}; | |
| strncpy(ifr.ifr_name, ifname, IFNAMSIZ); | |
| inet_pton(AF_INET, local, &addr.sin_addr); | |
| memcpy(&ifr.ifr_addr, &addr, sizeof(addr)); | |
| ioctl(s, SIOCSIFADDR, &ifr); | |
| inet_pton(AF_INET, peer, &addr.sin_addr); | |
| memcpy(&ifr.ifr_dstaddr, &addr, sizeof(addr)); | |
| ioctl(s, SIOCSIFDSTADDR, &ifr); | |
| close(s); | |
| } | |
| static int if_index(const char *ifname) { | |
| int s = socket(AF_INET, SOCK_DGRAM, 0); | |
| if (s < 0) | |
| die("socket"); | |
| struct ifreq ifr; | |
| memset(&ifr, 0, sizeof(ifr)); | |
| strncpy(ifr.ifr_name, ifname, IFNAMSIZ); | |
| if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) | |
| die("SIOCGIFINDEX"); | |
| close(s); | |
| return ifr.ifr_ifindex; | |
| } | |
| static void add_default_route(const char *ifname, const char *gw) { | |
| int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); | |
| struct { | |
| struct nlmsghdr nlh; | |
| struct rtmsg rtm; | |
| char buf[256]; | |
| } req = {0}; | |
| req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); | |
| req.nlh.nlmsg_type = RTM_NEWROUTE; | |
| req.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; | |
| req.rtm.rtm_family = AF_INET; | |
| req.rtm.rtm_table = RT_TABLE_MAIN; | |
| req.rtm.rtm_protocol = RTPROT_BOOT; | |
| req.rtm.rtm_scope = RT_SCOPE_UNIVERSE; | |
| req.rtm.rtm_type = RTN_UNICAST; | |
| struct rtattr *rta; | |
| rta = (void *)req.buf; | |
| rta->rta_type = RTA_GATEWAY; | |
| rta->rta_len = RTA_LENGTH(4); | |
| inet_pton(AF_INET, gw, RTA_DATA(rta)); | |
| req.nlh.nlmsg_len += rta->rta_len; | |
| rta = (void *)((char *)rta + rta->rta_len); | |
| rta->rta_type = RTA_OIF; | |
| rta->rta_len = RTA_LENGTH(4); | |
| *(int *)RTA_DATA(rta) = if_index(ifname); | |
| req.nlh.nlmsg_len += rta->rta_len; | |
| send(fd, &req, req.nlh.nlmsg_len, 0); | |
| close(fd); | |
| } | |
| static void if_up_netlink(const char *ifname) { | |
| int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); | |
| if (fd < 0) | |
| die("socket"); | |
| struct { | |
| struct nlmsghdr nlh; | |
| struct ifinfomsg ifi; | |
| } req = {0}; | |
| req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); | |
| req.nlh.nlmsg_type = RTM_NEWLINK; | |
| req.nlh.nlmsg_flags = NLM_F_REQUEST; | |
| req.ifi.ifi_family = AF_UNSPEC; | |
| req.ifi.ifi_index = if_index(ifname); | |
| req.ifi.ifi_flags = IFF_UP | IFF_RUNNING; | |
| req.ifi.ifi_change = IFF_UP | IFF_RUNNING; | |
| if (send(fd, &req, req.nlh.nlmsg_len, 0) < 0) | |
| die("send netlink"); | |
| close(fd); | |
| } | |
| /* ----------- TCP/IP helpers ------------------ */ | |
| static uint16_t csum16(void *buf, size_t len) { | |
| uint32_t sum = 0; | |
| uint16_t *p = buf; | |
| while (len > 1) { | |
| sum += *p++; | |
| len -= 2; | |
| } | |
| if (len) | |
| sum += *(uint8_t *)p; | |
| while (sum >> 16) | |
| sum = (sum & 0xffff) + (sum >> 16); | |
| return (uint16_t)~sum; | |
| } | |
| /* ---------- Persistent UDP Flow Management ---------- */ | |
| static struct udp_flow *udp_find(uint32_t cip, uint16_t cport, uint32_t sip, | |
| uint16_t sport) { | |
| for (int i = 0; i < MAX_UDP; i++) { | |
| struct udp_flow *f = &udp_flows[i]; | |
| /* Full 4-tuple match for proper flow isolation */ | |
| if (f->udp_relay >= 0 && f->cli_ip == cip && f->cli_port == cport && | |
| f->srv_ip == sip && f->srv_port == sport) | |
| return f; | |
| } | |
| return NULL; | |
| } | |
| static struct udp_flow *udp_alloc(void) { | |
| /* First try to find an empty slot */ | |
| for (int i = 0; i < MAX_UDP; i++) { | |
| if (udp_flows[i].udp_relay < 0) | |
| return &udp_flows[i]; | |
| } | |
| /* Otherwise evict oldest */ | |
| struct udp_flow *oldest = &udp_flows[0]; | |
| for (int i = 1; i < MAX_UDP; i++) { | |
| if (udp_flows[i].last_used < oldest->last_used) | |
| oldest = &udp_flows[i]; | |
| } | |
| if (oldest->tcp_ctrl >= 0) | |
| close(oldest->tcp_ctrl); | |
| if (oldest->udp_relay >= 0) | |
| close(oldest->udp_relay); | |
| return oldest; | |
| } | |
| /* Create a persistent SOCKS5 UDP ASSOCIATE connection. | |
| Returns 0 on success, -1 on error. Fills in flow's tcp_ctrl and udp_relay. */ | |
| static int udp_socks_setup(struct udp_flow *f, struct socks_config *cfg) { | |
| int tcp_sock = socket(AF_INET, SOCK_STREAM, 0); | |
| if (tcp_sock < 0) | |
| return -1; | |
| struct sockaddr_in proxy_addr = { | |
| .sin_family = AF_INET, | |
| .sin_port = htons((uint16_t)cfg->port), | |
| }; | |
| if (inet_pton(AF_INET, cfg->host, &proxy_addr.sin_addr) <= 0) { | |
| struct addrinfo hints = {.ai_family = AF_INET, .ai_socktype = SOCK_STREAM}; | |
| struct addrinfo *res; | |
| if (getaddrinfo(cfg->host, NULL, &hints, &res) != 0) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| proxy_addr.sin_addr = ((struct sockaddr_in *)res->ai_addr)->sin_addr; | |
| freeaddrinfo(res); | |
| } | |
| if (connect(tcp_sock, (struct sockaddr *)&proxy_addr, sizeof(proxy_addr)) < | |
| 0) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| /* SOCKS5 handshake */ | |
| int has_auth = cfg->username[0] != '\0'; | |
| uint8_t greeting[4]; | |
| if (has_auth) { | |
| greeting[0] = 0x05; | |
| greeting[1] = 0x02; | |
| greeting[2] = 0x00; | |
| greeting[3] = 0x02; | |
| if (write(tcp_sock, greeting, 4) != 4) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| } else { | |
| greeting[0] = 0x05; | |
| greeting[1] = 0x01; | |
| greeting[2] = 0x00; | |
| if (write(tcp_sock, greeting, 3) != 3) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| } | |
| uint8_t resp[2]; | |
| if (read(tcp_sock, resp, 2) != 2 || resp[0] != 0x05) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| if (resp[1] == 0x02) { | |
| if (!has_auth) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| size_t ulen = strlen(cfg->username); | |
| size_t plen = strlen(cfg->password); | |
| uint8_t auth[512]; | |
| size_t off = 0; | |
| auth[off++] = 0x01; | |
| auth[off++] = (uint8_t)ulen; | |
| memcpy(auth + off, cfg->username, ulen); | |
| off += ulen; | |
| auth[off++] = (uint8_t)plen; | |
| memcpy(auth + off, cfg->password, plen); | |
| off += plen; | |
| if (write(tcp_sock, auth, off) != (ssize_t)off) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| uint8_t auth_resp[2]; | |
| if (read(tcp_sock, auth_resp, 2) != 2 || auth_resp[1] != 0x00) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| } else if (resp[1] != 0x00) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| /* UDP ASSOCIATE request */ | |
| uint8_t assoc_req[10] = {0x05, 0x03, 0x00, 0x01, 0, 0, 0, 0, 0, 0}; | |
| if (write(tcp_sock, assoc_req, 10) != 10) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| uint8_t resp_hdr[4]; | |
| if (read(tcp_sock, resp_hdr, 4) != 4) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| if (resp_hdr[0] != 0x05 || resp_hdr[1] != 0x00) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| uint32_t relay_ip; | |
| uint16_t relay_port; | |
| if (resp_hdr[3] == 0x01) { | |
| uint8_t addr_port[6]; | |
| if (read(tcp_sock, addr_port, 6) != 6) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| memcpy(&relay_ip, addr_port, 4); | |
| relay_port = (uint16_t)((addr_port[4] << 8) | addr_port[5]); | |
| } else if (resp_hdr[3] == 0x03) { | |
| uint8_t dlen; | |
| if (read(tcp_sock, &dlen, 1) != 1) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| uint8_t domain_port[256 + 2]; | |
| if (read(tcp_sock, domain_port, dlen + 2) != dlen + 2) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| relay_ip = proxy_addr.sin_addr.s_addr; | |
| relay_port = (uint16_t)((domain_port[dlen] << 8) | domain_port[dlen + 1]); | |
| } else { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| if (relay_ip == 0 || relay_ip == htonl(0x7f000001)) | |
| relay_ip = proxy_addr.sin_addr.s_addr; | |
| int udp_sock = socket(AF_INET, SOCK_DGRAM, 0); | |
| if (udp_sock < 0) { | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| struct sockaddr_in relay_addr = { | |
| .sin_family = AF_INET, | |
| .sin_port = htons(relay_port), | |
| .sin_addr.s_addr = relay_ip, | |
| }; | |
| if (connect(udp_sock, (struct sockaddr *)&relay_addr, sizeof(relay_addr)) < | |
| 0) { | |
| close(udp_sock); | |
| close(tcp_sock); | |
| return -1; | |
| } | |
| /* Make non-blocking */ | |
| fcntl(udp_sock, F_SETFL, fcntl(udp_sock, F_GETFL) | O_NONBLOCK); | |
| f->tcp_ctrl = tcp_sock; | |
| f->udp_relay = udp_sock; | |
| f->relay_addr = relay_addr; /* Store for source validation */ | |
| epoll_add_udp(f); /* Register with epoll for receiving relay responses */ | |
| return 0; | |
| } | |
| /* Send UDP packet to SOCKS relay */ | |
| static void udp_send_to_relay(struct udp_flow *f, uint32_t dst_ip, | |
| uint16_t dst_port, const uint8_t *data, | |
| size_t len) { | |
| uint8_t pkt[65536]; | |
| pkt[0] = 0; | |
| pkt[1] = 0; | |
| pkt[2] = 0; /* RSV, FRAG */ | |
| pkt[3] = 0x01; /* ATYP = IPv4 */ | |
| memcpy(&pkt[4], &dst_ip, 4); | |
| pkt[8] = (uint8_t)(dst_port >> 8); | |
| pkt[9] = (uint8_t)(dst_port & 0xff); | |
| memcpy(&pkt[10], data, len); | |
| send(f->udp_relay, pkt, 10 + len, 0); | |
| f->last_used = time(NULL); | |
| } | |
| /* Inject UDP packet into TUN (response from server to client) */ | |
| static void udp_inject_tun(int tunfd, struct udp_flow *f, const uint8_t *data, | |
| size_t len) { | |
| uint8_t out[65536]; | |
| struct iphdr *ip = (struct iphdr *)out; | |
| struct udphdr *udp = (struct udphdr *)(out + sizeof(*ip)); | |
| memset(ip, 0, sizeof(*ip)); | |
| ip->version = 4; | |
| ip->ihl = 5; | |
| ip->ttl = 64; | |
| ip->protocol = IPPROTO_UDP; | |
| ip->saddr = f->srv_ip; | |
| ip->daddr = f->cli_ip; | |
| ip->tot_len = htons((uint16_t)(sizeof(*ip) + sizeof(*udp) + len)); | |
| ip->check = csum16(ip, sizeof(*ip)); | |
| udp->source = htons(f->srv_port); | |
| udp->dest = htons(f->cli_port); | |
| udp->len = htons((uint16_t)(sizeof(*udp) + len)); | |
| udp->check = 0; | |
| memcpy(out + sizeof(*ip) + sizeof(*udp), data, len); | |
| IGNORE_RESULT(write(tunfd, out, sizeof(*ip) + sizeof(*udp) + len)); | |
| } | |
| static void handle_udp(int tunfd, uint8_t *pkt, ssize_t len) { | |
| struct iphdr *ip = (struct iphdr *)pkt; | |
| size_t iphl = ip->ihl * 4; | |
| size_t ulen = (size_t)len; | |
| if (ulen < iphl + sizeof(struct udphdr)) | |
| return; | |
| struct udphdr *udp = (struct udphdr *)(pkt + iphl); | |
| uint16_t dport = ntohs(udp->dest); | |
| uint16_t sport = ntohs(udp->source); | |
| uint8_t *payload = pkt + iphl + sizeof(struct udphdr); | |
| size_t plen = ntohs(udp->len) - sizeof(struct udphdr); | |
| /* Check for host gateway access (10.0.1.x -> 127.0.0.x) */ | |
| uint32_t target_ip = ip->daddr; | |
| int is_gateway = is_gateway_ip(target_ip); | |
| if (is_gateway) { | |
| if (!is_gateway_allowed(target_ip, dport, IPPROTO_UDP)) { | |
| DBG("[parent] UDP to 10.0.1.%d:%d blocked", gateway_last_octet(target_ip), | |
| dport); | |
| return; | |
| } | |
| target_ip = gateway_to_localhost(ip->daddr); | |
| DBG("[parent] UDP gateway: 10.0.1.%d:%d -> 127.0.0.%d:%d", | |
| gateway_last_octet(ip->daddr), dport, gateway_last_octet(ip->daddr), | |
| dport); | |
| } | |
| if (socks_proxy.enabled && !is_gateway) { | |
| /* Find or create persistent UDP flow */ | |
| struct udp_flow *f = udp_find(ip->saddr, sport, ip->daddr, dport); | |
| if (!f) { | |
| f = udp_alloc(); | |
| memset(f, 0, sizeof(*f)); | |
| f->cli_ip = ip->saddr; | |
| f->cli_port = sport; | |
| f->srv_ip = ip->daddr; | |
| f->srv_port = dport; | |
| f->tcp_ctrl = -1; | |
| f->udp_relay = -1; | |
| if (udp_socks_setup(f, &socks_proxy) < 0) { | |
| f->udp_relay = -1; | |
| return; | |
| } | |
| } | |
| /* Update source port for response routing (may differ on reused flow) */ | |
| f->cli_port = sport; | |
| /* Forward to SOCKS relay */ | |
| udp_send_to_relay(f, ip->daddr, dport, payload, plen); | |
| } else { | |
| /* Direct UDP - simple request/response for non-SOCKS mode */ | |
| int s = socket(AF_INET, SOCK_DGRAM, 0); | |
| if (s < 0) | |
| return; | |
| struct sockaddr_in dst = { | |
| .sin_family = AF_INET, | |
| .sin_port = htons(dport), | |
| .sin_addr.s_addr = target_ip, /* Use potentially rewritten target */ | |
| }; | |
| sendto(s, payload, plen, 0, (struct sockaddr *)&dst, sizeof(dst)); | |
| DBG("UDP sent %zu bytes to %s:%d", plen, | |
| is_gateway ? "127.0.0.1" : "remote", dport); | |
| struct timeval tv = {.tv_sec = 2, .tv_usec = 0}; | |
| setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); | |
| uint8_t reply[65536]; | |
| ssize_t rlen = recvfrom(s, reply, sizeof(reply), 0, NULL, NULL); | |
| close(s); | |
| if (rlen <= 0) { | |
| DBG("UDP recv failed or timed out (rlen=%zd, errno=%d)", rlen, errno); | |
| return; | |
| } | |
| DBG("UDP recv got %zd bytes", rlen); | |
| /* Build reply packet */ | |
| uint8_t out[65536]; | |
| struct iphdr *rip = (struct iphdr *)out; | |
| struct udphdr *rudp = (struct udphdr *)(out + sizeof(*rip)); | |
| memset(rip, 0, sizeof(*rip)); | |
| rip->version = 4; | |
| rip->ihl = 5; | |
| rip->ttl = 64; | |
| rip->protocol = IPPROTO_UDP; | |
| rip->saddr = ip->daddr; | |
| rip->daddr = ip->saddr; | |
| rip->tot_len = | |
| htons((uint16_t)(sizeof(*rip) + sizeof(*rudp) + (size_t)rlen)); | |
| rip->check = csum16(rip, sizeof(*rip)); | |
| rudp->source = htons(dport); | |
| rudp->dest = htons(sport); | |
| rudp->len = htons((uint16_t)(sizeof(*rudp) + (size_t)rlen)); | |
| rudp->check = 0; | |
| memcpy(out + sizeof(*rip) + sizeof(*rudp), reply, (size_t)rlen); | |
| IGNORE_RESULT( | |
| write(tunfd, out, sizeof(*rip) + sizeof(*rudp) + (size_t)rlen)); | |
| } | |
| } | |
| /* Handle ICMP echo request (ping) to gateway - responds directly */ | |
| static void handle_icmp(int tunfd, uint8_t *pkt, ssize_t len) { | |
| struct iphdr *ip = (struct iphdr *)pkt; | |
| size_t iphl = ip->ihl * 4; | |
| if ((size_t)len < iphl + 8) /* ICMP header is 8 bytes minimum */ | |
| return; | |
| /* Only respond to ping on 10.0.0.1 (always allowed) */ | |
| if (ip->daddr != HOST_PING_IP) | |
| return; | |
| uint8_t *icmp = pkt + iphl; | |
| uint8_t type = icmp[0]; | |
| /* Only respond to echo request (type 8) */ | |
| if (type != 8) | |
| return; | |
| DBG("ICMP echo request to gateway - sending reply"); | |
| /* Build echo reply */ | |
| uint8_t out[65536]; | |
| size_t icmp_len = (size_t)len - iphl; | |
| struct iphdr *rip = (struct iphdr *)out; | |
| memset(rip, 0, sizeof(*rip)); | |
| rip->version = 4; | |
| rip->ihl = 5; | |
| rip->ttl = 64; | |
| rip->protocol = IPPROTO_ICMP; | |
| rip->saddr = ip->daddr; /* Gateway IP */ | |
| rip->daddr = ip->saddr; /* Client IP */ | |
| rip->tot_len = htons((uint16_t)(sizeof(*rip) + icmp_len)); | |
| rip->check = csum16(rip, sizeof(*rip)); | |
| /* Copy ICMP data and change type to echo reply (0) */ | |
| memcpy(out + sizeof(*rip), icmp, icmp_len); | |
| out[sizeof(*rip)] = 0; /* Type = echo reply */ | |
| /* Recalculate ICMP checksum */ | |
| uint8_t *ricmp = out + sizeof(*rip); | |
| ricmp[2] = 0; | |
| ricmp[3] = 0; | |
| uint16_t icmp_csum = csum16(ricmp, icmp_len); | |
| ricmp[2] = (uint8_t)(icmp_csum & 0xff); | |
| ricmp[3] = (uint8_t)(icmp_csum >> 8); | |
| IGNORE_RESULT(write(tunfd, out, sizeof(*rip) + icmp_len)); | |
| } | |
| static struct tcp_flow *tcp_find(uint32_t cip, uint16_t cport, uint32_t sip, | |
| uint16_t sport) { | |
| for (int i = 0; i < MAX_TCP; i++) { | |
| struct tcp_flow *f = &tcp_flows[i]; | |
| if (f->sock >= 0 && f->cli_ip == cip && f->cli_port == cport && | |
| f->srv_ip == sip && f->srv_port == sport) | |
| return f; | |
| } | |
| return NULL; | |
| } | |
| static struct tcp_flow *tcp_alloc(void) { | |
| for (int i = 0; i < MAX_TCP; i++) { | |
| if (tcp_flows[i].sock < 0) | |
| return &tcp_flows[i]; | |
| } | |
| return NULL; | |
| } | |
| static uint32_t csum16_partial(const void *buf, size_t len) { | |
| const uint8_t *p = buf; | |
| uint32_t sum = 0; | |
| while (len > 1) { | |
| /* Little-endian integer construction from bytes */ | |
| sum += (uint32_t)p[0] | ((uint32_t)p[1] << 8); | |
| p += 2; | |
| len -= 2; | |
| } | |
| if (len) | |
| sum += (uint32_t)p[0]; | |
| return sum; | |
| } | |
| static uint16_t tcp_checksum(struct iphdr *ip, struct tcphdr *tcp, | |
| size_t tcp_len, const uint8_t *payload, | |
| size_t plen) { | |
| struct { | |
| uint32_t src; | |
| uint32_t dst; | |
| uint8_t zero; | |
| uint8_t proto; | |
| uint16_t len; | |
| } __attribute__((packed)) pseudo; | |
| memset(&pseudo, 0, sizeof(pseudo)); | |
| pseudo.src = ip->saddr; | |
| pseudo.dst = ip->daddr; | |
| pseudo.zero = 0; | |
| pseudo.proto = IPPROTO_TCP; | |
| pseudo.len = htons((uint16_t)(tcp_len + plen)); | |
| uint32_t sum = 0; | |
| uint32_t p1 = csum16_partial(&pseudo, sizeof(pseudo)); | |
| uint32_t p2 = csum16_partial(tcp, tcp_len); | |
| uint32_t p3 = plen ? csum16_partial(payload, plen) : 0; | |
| sum = p1 + p2 + p3; | |
| while (sum >> 16) | |
| sum = (sum & 0xffff) + (sum >> 16); | |
| return (uint16_t)~sum; | |
| } | |
| static uint16_t ip_checksum(const void *buf, size_t len) { | |
| const uint16_t *p = buf; | |
| uint32_t sum = 0; | |
| while (len > 1) { | |
| sum += *p++; | |
| len -= 2; | |
| } | |
| if (len) | |
| sum += *(const uint8_t *)p; | |
| while (sum >> 16) | |
| sum = (sum & 0xffff) + (sum >> 16); | |
| return (uint16_t)~sum; | |
| } | |
| /* Send a TCP packet from server to client */ | |
| static void send_tcp_packet(int tunfd, struct tcp_flow *f, uint8_t flags, | |
| const uint8_t *payload, size_t plen) { | |
| uint8_t out[65536]; | |
| struct iphdr *ip = (struct iphdr *)out; | |
| struct tcphdr *tcp = (struct tcphdr *)(out + sizeof(*ip)); | |
| size_t tcp_hdr_len = sizeof(*tcp); | |
| size_t total_len = sizeof(*ip) + tcp_hdr_len + plen; | |
| memset(ip, 0, sizeof(*ip)); | |
| ip->version = 4; | |
| ip->ihl = 5; | |
| ip->ttl = 64; | |
| ip->protocol = IPPROTO_TCP; | |
| ip->saddr = f->srv_ip; | |
| ip->daddr = f->cli_ip; | |
| ip->tot_len = htons((uint16_t)total_len); | |
| memset(tcp, 0, sizeof(*tcp)); | |
| tcp->source = htons(f->srv_port); | |
| tcp->dest = htons(f->cli_port); | |
| tcp->seq = htonl(f->srv_next); | |
| tcp->ack_seq = htonl(f->cli_next); | |
| tcp->doff = (tcp_hdr_len / 4) & 0xF; | |
| tcp->ack = 1; | |
| if (flags & 0x08) | |
| tcp->psh = 1; /* PSH */ | |
| if (flags & 0x01) | |
| tcp->fin = 1; /* FIN */ | |
| tcp->window = htons(65535); | |
| if (plen > 0) | |
| memcpy(out + sizeof(*ip) + tcp_hdr_len, payload, plen); | |
| tcp->check = tcp_checksum(ip, tcp, tcp_hdr_len, payload, plen); | |
| ip->check = ip_checksum(ip, sizeof(*ip)); | |
| IGNORE_RESULT(write(tunfd, out, total_len)); | |
| /* Update sequence number for data sent */ | |
| if (plen > 0) | |
| f->srv_next += (uint32_t)plen; | |
| } | |
| /* ---------- TCP Option Parsing ---------- */ | |
| struct tcp_options { | |
| uint16_t mss; | |
| uint8_t wscale; | |
| uint32_t tsval; | |
| uint32_t tsecr; | |
| int ts_present; | |
| int sack_permitted; | |
| }; | |
| /* Parse TCP options from the options portion of TCP header. | |
| Returns 0 on success, -1 if options are malformed. */ | |
| static int parse_tcp_options(const uint8_t *opts, size_t len, | |
| struct tcp_options *out) { | |
| memset(out, 0, sizeof(*out)); | |
| out->mss = 536; /* Default MSS per RFC 879 */ | |
| size_t i = 0; | |
| while (i < len) { | |
| uint8_t kind = opts[i]; | |
| if (kind == 0) /* End of option list */ | |
| break; | |
| if (kind == 1) { /* NOP */ | |
| i++; | |
| continue; | |
| } | |
| /* All other options have length field */ | |
| if (i + 1 >= len) | |
| return -1; | |
| uint8_t optlen = opts[i + 1]; | |
| if (optlen < 2 || i + optlen > len) | |
| return -1; | |
| switch (kind) { | |
| case 2: /* MSS */ | |
| if (optlen == 4) { | |
| out->mss = (uint16_t)((opts[i + 2] << 8) | opts[i + 3]); | |
| } | |
| break; | |
| case 3: /* Window Scale */ | |
| if (optlen == 3) { | |
| out->wscale = opts[i + 2]; | |
| } | |
| break; | |
| case 4: /* SACK Permitted */ | |
| if (optlen == 2) { | |
| out->sack_permitted = 1; | |
| } | |
| break; | |
| case 8: /* Timestamp */ | |
| if (optlen == 10) { | |
| out->ts_present = 1; | |
| out->tsval = (uint32_t)((opts[i + 2] << 24) | (opts[i + 3] << 16) | | |
| (opts[i + 4] << 8) | opts[i + 5]); | |
| out->tsecr = (uint32_t)((opts[i + 6] << 24) | (opts[i + 7] << 16) | | |
| (opts[i + 8] << 8) | opts[i + 9]); | |
| } | |
| break; | |
| } | |
| i += optlen; | |
| } | |
| return 0; | |
| } | |
| /* Build TCP options for SYN-ACK response. | |
| Returns the number of bytes written to buf. */ | |
| static size_t build_synack_options(const struct tcp_options *client_opts, | |
| uint8_t *buf, uint32_t our_tsval) { | |
| size_t off = 0; | |
| /* MSS option (kind=2, len=4) - always include */ | |
| buf[off++] = 2; | |
| buf[off++] = 4; | |
| buf[off++] = 0x05; /* MSS = 1460 */ | |
| buf[off++] = 0xb4; | |
| /* Timestamp option if client requested (kind=8, len=10) */ | |
| if (client_opts->ts_present) { | |
| buf[off++] = 8; | |
| buf[off++] = 10; | |
| buf[off++] = (uint8_t)(our_tsval >> 24); | |
| buf[off++] = (uint8_t)(our_tsval >> 16); | |
| buf[off++] = (uint8_t)(our_tsval >> 8); | |
| buf[off++] = (uint8_t)(our_tsval); | |
| buf[off++] = (uint8_t)(client_opts->tsval >> 24); | |
| buf[off++] = (uint8_t)(client_opts->tsval >> 16); | |
| buf[off++] = (uint8_t)(client_opts->tsval >> 8); | |
| buf[off++] = (uint8_t)(client_opts->tsval); | |
| } | |
| /* Pad to 4-byte boundary with NOPs */ | |
| while (off % 4 != 0) | |
| buf[off++] = 1; /* NOP */ | |
| return off; | |
| } | |
| /* Send a TCP RST packet */ | |
| static void send_tcp_rst(int tunfd, uint32_t saddr, uint32_t daddr, | |
| uint16_t sport, uint16_t dport, uint32_t seq, | |
| uint32_t ack_seq) { | |
| uint8_t out[64]; | |
| struct iphdr *ip = (struct iphdr *)out; | |
| struct tcphdr *tcp = (struct tcphdr *)(out + sizeof(*ip)); | |
| memset(out, 0, sizeof(out)); | |
| ip->version = 4; | |
| ip->ihl = 5; | |
| ip->ttl = 64; | |
| ip->protocol = IPPROTO_TCP; | |
| ip->saddr = saddr; | |
| ip->daddr = daddr; | |
| ip->tot_len = htons(sizeof(*ip) + sizeof(*tcp)); | |
| tcp->source = htons(sport); | |
| tcp->dest = htons(dport); | |
| tcp->seq = htonl(seq); | |
| tcp->ack_seq = htonl(ack_seq); | |
| tcp->doff = 5; | |
| tcp->rst = 1; | |
| tcp->ack = 1; | |
| tcp->window = 0; | |
| tcp->check = tcp_checksum(ip, tcp, sizeof(*tcp), NULL, 0); | |
| ip->check = ip_checksum(ip, sizeof(*ip)); | |
| IGNORE_RESULT(write(tunfd, out, sizeof(*ip) + sizeof(*tcp))); | |
| } | |
| /* Send RST for a given flow and clean it up */ | |
| static void tcp_flow_rst(int tunfd, struct tcp_flow *f) { | |
| if (f->sock >= 0) { | |
| epoll_del(f->sock); | |
| close(f->sock); | |
| } | |
| send_tcp_rst(tunfd, f->srv_ip, f->cli_ip, f->srv_port, f->cli_port, | |
| f->srv_next, f->cli_next); | |
| f->sock = -1; | |
| f->state = SP_TCP_CLOSED; | |
| } | |
| static void handle_tcp(int tunfd, uint8_t *pkt, ssize_t len) { | |
| if (len <= 0) | |
| return; | |
| size_t ulen = (size_t)len; | |
| struct iphdr *ip = (struct iphdr *)pkt; | |
| size_t iphl = ip->ihl * 4; | |
| if (ulen < iphl + sizeof(struct tcphdr)) | |
| return; | |
| struct tcphdr *tcp = (struct tcphdr *)(pkt + iphl); | |
| uint32_t cip = ip->saddr; | |
| uint32_t sip = ip->daddr; | |
| uint16_t cport = ntohs(tcp->source); | |
| uint16_t sport = ntohs(tcp->dest); | |
| /* ---------- RST ---------- */ | |
| if (tcp->rst) { | |
| struct tcp_flow *f = tcp_find(cip, cport, sip, sport); | |
| if (f && f->sock >= 0) { | |
| epoll_del(f->sock); | |
| close(f->sock); | |
| f->sock = -1; | |
| f->state = SP_TCP_CLOSED; | |
| } | |
| return; | |
| } | |
| /* ---------- SYN ---------- */ | |
| if (tcp->syn && !tcp->ack) { | |
| struct tcp_flow *f = tcp_find(cip, cport, sip, sport); | |
| /* Parse TCP options from SYN */ | |
| size_t tcp_hdr_len = tcp->doff * 4; | |
| struct tcp_options cli_opts; | |
| if (tcp_hdr_len > sizeof(struct tcphdr)) { | |
| const uint8_t *opt_start = (const uint8_t *)tcp + sizeof(struct tcphdr); | |
| size_t opt_len = tcp_hdr_len - sizeof(struct tcphdr); | |
| parse_tcp_options(opt_start, opt_len, &cli_opts); | |
| } else { | |
| memset(&cli_opts, 0, sizeof(cli_opts)); | |
| } | |
| if (!f) { | |
| /* Rate limit new connections */ | |
| if (!check_rate_limit()) { | |
| return; /* Too many connections - drop SYN silently */ | |
| } | |
| /* first SYN */ | |
| f = tcp_alloc(); | |
| if (!f) | |
| return; | |
| memset(f, 0, sizeof(*f)); | |
| f->cli_ip = cip; | |
| f->cli_port = cport; | |
| f->srv_ip = sip; | |
| f->srv_port = sport; | |
| f->cli_isn = ntohl(tcp->seq); | |
| f->cli_next = f->cli_isn + 1; | |
| f->srv_isn = (uint32_t)random(); | |
| f->srv_next = f->srv_isn + 1; | |
| /* Store timestamp negotiation state */ | |
| f->ts_ok = cli_opts.ts_present; | |
| if (cli_opts.ts_present) { | |
| f->ts_recent = cli_opts.tsval; | |
| } | |
| int s; | |
| int use_gateway = | |
| is_gateway_ip(sip) && is_gateway_allowed(sip, sport, IPPROTO_TCP); | |
| if (use_gateway) { | |
| /* Gateway access - connect to localhost (10.0.1.x -> 127.0.0.x) */ | |
| uint32_t local_ip = gateway_to_localhost(sip); | |
| DBG("[parent] TCP gateway: 10.0.1.%d:%d -> 127.0.0.%d:%d", | |
| gateway_last_octet(sip), sport, gateway_last_octet(sip), sport); | |
| s = socket(AF_INET, SOCK_STREAM, 0); | |
| if (s < 0) | |
| return; | |
| struct sockaddr_in dst = { | |
| .sin_family = AF_INET, | |
| .sin_port = htons(sport), | |
| .sin_addr.s_addr = local_ip, | |
| }; | |
| if (connect(s, (struct sockaddr *)&dst, sizeof(dst)) < 0) { | |
| close(s); | |
| f->sock = -1; | |
| send_tcp_rst(tunfd, sip, cip, sport, cport, 0, f->cli_next); | |
| return; | |
| } | |
| } else if (socks_proxy.enabled) { | |
| /* Connect via SOCKS5 proxy */ | |
| s = socks5_connect(&socks_proxy, sip, sport); | |
| if (s < 0) { | |
| f->sock = -1; | |
| /* Send RST on connection failure */ | |
| send_tcp_rst(tunfd, sip, cip, sport, cport, 0, f->cli_next); | |
| return; | |
| } | |
| } else { | |
| /* Direct connection */ | |
| s = socket(AF_INET, SOCK_STREAM, 0); | |
| if (s < 0) | |
| return; | |
| struct sockaddr_in dst = { | |
| .sin_family = AF_INET, | |
| .sin_port = htons(sport), | |
| .sin_addr.s_addr = sip, | |
| }; | |
| if (connect(s, (struct sockaddr *)&dst, sizeof(dst)) < 0) { | |
| close(s); | |
| f->sock = -1; | |
| /* Send RST on connection failure */ | |
| send_tcp_rst(tunfd, sip, cip, sport, cport, 0, f->cli_next); | |
| return; | |
| } | |
| } | |
| f->sock = s; | |
| f->state = SP_TCP_SYN_RECEIVED; | |
| f->last_active = time(NULL); | |
| } | |
| /* Build SYN-ACK with mirrored options */ | |
| uint8_t out[128]; | |
| struct iphdr *rip = (struct iphdr *)out; | |
| struct tcphdr *rtcp = (struct tcphdr *)(out + sizeof(*rip)); | |
| /* Build TCP options mirroring client's capabilities */ | |
| uint8_t opts[24]; | |
| uint32_t our_tsval = (uint32_t)time(NULL); | |
| size_t opts_len = build_synack_options(&cli_opts, opts, our_tsval); | |
| memset(rip, 0, sizeof(*rip)); | |
| rip->version = 4; | |
| rip->ihl = 5; | |
| rip->ttl = 64; | |
| rip->protocol = IPPROTO_TCP; | |
| rip->saddr = sip; | |
| rip->daddr = cip; | |
| memset(rtcp, 0, sizeof(*rtcp)); | |
| rtcp->source = htons(sport); | |
| rtcp->dest = htons(cport); | |
| rtcp->seq = htonl(f->srv_isn); | |
| rtcp->ack_seq = htonl(f->cli_next); | |
| rtcp->syn = 1; | |
| rtcp->ack = 1; | |
| size_t full_tcp_len = sizeof(struct tcphdr) + opts_len; | |
| rtcp->doff = (full_tcp_len / 4) & 0xF; | |
| rtcp->window = htons(65535); | |
| memcpy((uint8_t *)rtcp + sizeof(*rtcp), opts, opts_len); | |
| rip->tot_len = htons((uint16_t)(sizeof(*rip) + full_tcp_len)); | |
| rtcp->check = tcp_checksum(rip, rtcp, full_tcp_len, NULL, 0); | |
| rip->check = ip_checksum(rip, sizeof(*rip)); | |
| IGNORE_RESULT(write(tunfd, out, sizeof(*rip) + full_tcp_len)); | |
| return; | |
| } | |
| /* ---------- ACK / DATA ---------- */ | |
| if (tcp->ack && !tcp->syn) { | |
| struct tcp_flow *f = tcp_find(cip, cport, sip, sport); | |
| if (!f || f->sock < 0) | |
| return; | |
| uint32_t seq = ntohl(tcp->seq); | |
| /* Calculate payload */ | |
| size_t tcp_hdr_len = tcp->doff * 4; | |
| size_t payload_off = iphl + tcp_hdr_len; | |
| size_t payload_len = 0; | |
| if (ulen > payload_off) | |
| payload_len = ulen - payload_off; | |
| /* Check sequence number */ | |
| if (seq != f->cli_next) { | |
| /* Out of order or retransmit - just ACK what we have */ | |
| if (payload_len > 0) | |
| send_tcp_packet(tunfd, f, 0, NULL, 0); | |
| return; | |
| } | |
| if (f->state == SP_TCP_SYN_RECEIVED) { | |
| f->state = SP_TCP_ESTABLISHED; | |
| epoll_add_tcp( | |
| f); /* Register with epoll now that connection is established */ | |
| } | |
| /* Update activity time */ | |
| f->last_active = time(NULL); | |
| /* Forward payload to real server with partial send handling */ | |
| if (payload_len > 0) { | |
| uint8_t *payload = pkt + payload_off; | |
| size_t total_sent = 0; | |
| while (total_sent < payload_len) { | |
| ssize_t sent = | |
| send(f->sock, payload + total_sent, payload_len - total_sent, 0); | |
| if (sent < 0) { | |
| if (errno == EINTR) | |
| continue; | |
| if (errno == EAGAIN || errno == EWOULDBLOCK) | |
| break; | |
| /* Connection error - send RST and clean up */ | |
| tcp_flow_rst(tunfd, f); | |
| return; | |
| } | |
| if (sent == 0) | |
| break; | |
| total_sent += (size_t)sent; | |
| } | |
| if (total_sent > 0) { | |
| f->cli_next += (uint32_t)total_sent; | |
| /* Send ACK back to client */ | |
| send_tcp_packet(tunfd, f, 0, NULL, 0); | |
| } | |
| } | |
| /* Handle FIN from client */ | |
| if (tcp->fin) { | |
| f->cli_next++; | |
| send_tcp_packet(tunfd, f, 0x01, NULL, 0); /* FIN+ACK */ | |
| epoll_del(f->sock); | |
| close(f->sock); | |
| f->sock = -1; | |
| } | |
| return; | |
| } | |
| } | |
| static void event_loop(int tunfd, pid_t pid) { | |
| uint8_t buf[65536]; | |
| struct epoll_event events[MAX_EVENTS]; | |
| /* Register TUN fd (static wrapper on stack - never removed) */ | |
| struct epoll_wrapper tun_ew = {.type = FD_TUN, .fd = tunfd, .flow = NULL}; | |
| struct epoll_event tun_ev = {.events = EPOLLIN, .data.ptr = &tun_ew}; | |
| if (epoll_ctl(g_epfd, EPOLL_CTL_ADD, tunfd, &tun_ev) < 0) | |
| die("epoll_ctl TUN"); | |
| for (;;) { | |
| /* Check child status */ | |
| int status; | |
| if (waitpid(pid, &status, WNOHANG) > 0) | |
| if (WIFEXITED(status) || WIFSIGNALED(status)) | |
| break; | |
| int n = epoll_wait(g_epfd, events, MAX_EVENTS, EPOLL_TIMEOUT_MS); | |
| if (n < 0) { | |
| if (errno == EINTR) | |
| continue; | |
| break; | |
| } | |
| for (int i = 0; i < n; i++) { | |
| struct epoll_wrapper *ew = events[i].data.ptr; | |
| if (!ew) | |
| continue; | |
| switch (ew->type) { | |
| case FD_TUN: { | |
| /* Handle TUN packets (outgoing from child) */ | |
| ssize_t r = read(tunfd, buf, sizeof(buf)); | |
| if (r > 0) { | |
| struct iphdr *ip = (struct iphdr *)buf; | |
| if (ip->version == 4) { | |
| if (ip->protocol == IPPROTO_UDP) | |
| handle_udp(tunfd, buf, r); | |
| else if (ip->protocol == IPPROTO_TCP) | |
| handle_tcp(tunfd, buf, r); | |
| else if (ip->protocol == IPPROTO_ICMP) | |
| handle_icmp(tunfd, buf, r); | |
| } | |
| } | |
| break; | |
| } | |
| case FD_TCP: { | |
| /* Handle TCP server socket responses */ | |
| struct tcp_flow *f = ew->flow; | |
| if (f && f->sock >= 0 && f->state == SP_TCP_ESTABLISHED) { | |
| ssize_t r = recv(f->sock, buf, sizeof(buf) - 64, 0); | |
| if (r > 0) { | |
| /* Forward data to client */ | |
| send_tcp_packet(tunfd, f, 0x08, buf, (size_t)r); | |
| } else if (r == 0) { | |
| /* Server closed connection - send FIN to client */ | |
| send_tcp_packet(tunfd, f, 0x01, NULL, 0); | |
| epoll_del(f->sock); | |
| close(f->sock); | |
| f->sock = -1; | |
| } | |
| } | |
| break; | |
| } | |
| case FD_UDP_RELAY: { | |
| /* Handle UDP relay responses (incoming from SOCKS proxy) */ | |
| struct udp_flow *f = ew->flow; | |
| if (f && f->udp_relay >= 0) { | |
| struct sockaddr_in from; | |
| socklen_t fromlen = sizeof(from); | |
| ssize_t r = recvfrom(f->udp_relay, buf, sizeof(buf), 0, | |
| (struct sockaddr *)&from, &fromlen); | |
| /* Validate source address - drop spoofed packets */ | |
| if (r > 0 && (from.sin_addr.s_addr != f->relay_addr.sin_addr.s_addr || | |
| from.sin_port != f->relay_addr.sin_port)) { | |
| DBG("[parent] UDP spoof blocked: got %s:%d, expected %s:%d", | |
| inet_ntoa(from.sin_addr), ntohs(from.sin_port), | |
| inet_ntoa(f->relay_addr.sin_addr), | |
| ntohs(f->relay_addr.sin_port)); | |
| break; /* Packet from unexpected source */ | |
| } | |
| if (r > 10) { | |
| /* Validate FRAG field (byte 2) - we don't support fragmentation */ | |
| if (buf[2] != 0) | |
| break; | |
| /* Strip SOCKS5 UDP header */ | |
| size_t hdr_len = 10; | |
| if (buf[3] == 0x03) | |
| hdr_len = 4 + 1 + buf[4] + 2; | |
| else if (buf[3] == 0x04) | |
| hdr_len = 4 + 16 + 2; | |
| if ((size_t)r > hdr_len) { | |
| udp_inject_tun(tunfd, f, buf + hdr_len, (size_t)r - hdr_len); | |
| f->last_used = time(NULL); | |
| } | |
| } else if (r == 0) { | |
| /* Relay closed */ | |
| epoll_del(f->udp_relay); | |
| close(f->udp_relay); | |
| close(f->tcp_ctrl); | |
| f->udp_relay = -1; | |
| f->tcp_ctrl = -1; | |
| } | |
| } | |
| break; | |
| } | |
| } | |
| } | |
| /* Cleanup stale TCP flows (idle for >120 seconds) */ | |
| time_t now = time(NULL); | |
| for (int i = 0; i < MAX_TCP; i++) { | |
| if (tcp_flows[i].sock >= 0 && tcp_flows[i].state == SP_TCP_ESTABLISHED && | |
| (now - tcp_flows[i].last_active) > 120) { | |
| send_tcp_packet(tunfd, &tcp_flows[i], 0x01, NULL, 0); /* FIN */ | |
| epoll_del(tcp_flows[i].sock); | |
| close(tcp_flows[i].sock); | |
| tcp_flows[i].sock = -1; | |
| tcp_flows[i].state = SP_TCP_CLOSED; | |
| } | |
| } | |
| /* Cleanup stale UDP flows (idle for >30 seconds) */ | |
| for (int i = 0; i < MAX_UDP; i++) { | |
| if (udp_flows[i].udp_relay >= 0 && (now - udp_flows[i].last_used) > 30) { | |
| epoll_del(udp_flows[i].udp_relay); | |
| close(udp_flows[i].udp_relay); | |
| close(udp_flows[i].tcp_ctrl); | |
| udp_flows[i].udp_relay = -1; | |
| udp_flows[i].tcp_ctrl = -1; | |
| } | |
| } | |
| } | |
| } | |
| /* ---------- main ---------- */ | |
| int main(int argc, char **argv) { | |
| /* Parse our arguments first */ | |
| int cmd_start = 1; | |
| for (int i = 1; i < argc; i++) { | |
| if (strcmp(argv[i], "--socks") == 0 && i + 1 < argc) { | |
| parse_socks_url(argv[i + 1], &socks_proxy); | |
| if (socks_proxy.enabled) { | |
| fprintf(stderr, "Using SOCKS5 proxy: %s:%d%s\n", socks_proxy.host, | |
| socks_proxy.port, | |
| socks_proxy.username[0] ? " (with auth)" : ""); | |
| } | |
| i++; /* skip next arg (the proxy URL) */ | |
| cmd_start = i + 1; | |
| } else if (strcmp(argv[i], "--root") == 0) { | |
| keep_root = 1; | |
| cmd_start = i + 1; | |
| } else if (strcmp(argv[i], "--verbose") == 0 || | |
| strcmp(argv[i], "-v") == 0) { | |
| verbose = 1; | |
| cmd_start = i + 1; | |
| } else if (strncmp(argv[i], "--host=", 7) == 0) { | |
| const char *spec = argv[i] + 7; | |
| if (strcmp(spec, "*") == 0) { | |
| host_allow_all = 1; | |
| DBG("Host gateway: all IPs, ports, and protocols enabled"); | |
| } else if (host_rule_count < MAX_HOST_RULES) { | |
| struct host_rule *r = &host_rules[host_rule_count]; | |
| memset(r, 0, sizeof(*r)); | |
| /* Make a mutable copy for parsing */ | |
| char buf[128]; | |
| strncpy(buf, spec, sizeof(buf) - 1); | |
| buf[sizeof(buf) - 1] = '\0'; | |
| /* Parse protocol suffix /tcp or /udp */ | |
| char *slash = strchr(buf, '/'); | |
| if (slash) { | |
| *slash = '\0'; | |
| if (strcmp(slash + 1, "tcp") == 0) | |
| r->proto = IPPROTO_TCP; | |
| else if (strcmp(slash + 1, "udp") == 0) | |
| r->proto = IPPROTO_UDP; | |
| else { | |
| fprintf(stderr, "Invalid protocol: %s (use /tcp or /udp)\n", | |
| slash + 1); | |
| cmd_start = i + 1; | |
| continue; | |
| } | |
| } | |
| /* Parse 127.0.0.X:PORT format */ | |
| if (strncmp(buf, "127.0.0.", 8) == 0) { | |
| char *colon = strchr(buf + 8, ':'); | |
| if (colon) { | |
| *colon = '\0'; | |
| int last_octet = atoi(buf + 8); | |
| const char *port_str = colon + 1; | |
| r->last_octet = (uint8_t)last_octet; | |
| if (strcmp(port_str, "*") == 0) { | |
| r->wildcard_port = 1; | |
| } else { | |
| r->port = (uint16_t)atoi(port_str); | |
| } | |
| host_rule_count++; | |
| const char *proto_str = r->proto == IPPROTO_TCP ? "/tcp" | |
| : r->proto == IPPROTO_UDP ? "/udp" | |
| : ""; | |
| if (r->wildcard_port) | |
| DBG("Host gateway: 127.0.0.%d:*%s", r->last_octet, proto_str); | |
| else | |
| DBG("Host gateway: 127.0.0.%d:%d%s", r->last_octet, r->port, | |
| proto_str); | |
| } else { | |
| fprintf(stderr, "Invalid format: %s (expected 127.0.0.X:PORT)\n", | |
| spec); | |
| } | |
| } else { | |
| fprintf(stderr, "Invalid IP: %s (must be 127.0.0.X)\n", spec); | |
| } | |
| } | |
| cmd_start = i + 1; | |
| } else { | |
| /* First non-flag argument is the command */ | |
| cmd_start = i; | |
| break; | |
| } | |
| } | |
| if (cmd_start >= argc) { | |
| fprintf(stderr, "usage: %s [OPTIONS] <cmd> [args...]\n\n", argv[0]); | |
| fprintf(stderr, "Options:\n"); | |
| fprintf(stderr, " --socks <proxy> SOCKS5 proxy\n"); | |
| fprintf(stderr, " --root Keep root capabilities\n"); | |
| fprintf(stderr, " -v, --verbose Print debug info\n"); | |
| fprintf(stderr, | |
| "\nHost gateway (child accesses 10.0.1.x -> host 127.0.0.x):\n"); | |
| fprintf(stderr, " --host=127.0.0.X:PORT[/tcp|udp] Allow IP:PORT\n"); | |
| fprintf(stderr, | |
| " --host=127.0.0.X:*[/tcp|udp] Allow all ports on IP\n"); | |
| fprintf(stderr, " --host=* Allow everything\n"); | |
| fprintf(stderr, "\nExamples:\n"); | |
| fprintf(stderr, | |
| " --host=127.0.0.53:53/udp DNS via systemd-resolved\n"); | |
| fprintf(stderr, " --host=127.0.0.1:8080/tcp HTTP server\n"); | |
| fprintf(stderr, "\nNote: ping to 10.0.0.1 is always allowed\n"); | |
| return 1; | |
| } | |
| /* Shift argv to command */ | |
| argv = &argv[cmd_start]; | |
| argc -= cmd_start; | |
| int sp[2], ctl[2], sync[2]; | |
| (void)socketpair(AF_UNIX, SOCK_STREAM, 0, sp); | |
| (void)socketpair(AF_UNIX, SOCK_STREAM, 0, ctl); | |
| (void)socketpair(AF_UNIX, SOCK_STREAM, 0, sync); | |
| uid_t uid = getuid(); | |
| gid_t gid = getgid(); | |
| for (int i = 0; i < MAX_TCP; i++) | |
| tcp_flows[i].sock = -1; | |
| for (int i = 0; i < MAX_UDP; i++) { | |
| udp_flows[i].udp_relay = -1; | |
| udp_flows[i].tcp_ctrl = -1; | |
| } | |
| /* Create epoll instance for event loop */ | |
| g_epfd = epoll_create1(0); | |
| if (g_epfd < 0) | |
| die("epoll_create1"); | |
| pid_t pid = fork(); | |
| if (pid == 0) { | |
| /* ---------- child ---------- */ | |
| close(sp[0]); | |
| close(ctl[0]); | |
| close(sync[0]); | |
| if (unshare(CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS | CLONE_NEWIPC | | |
| CLONE_NEWUTS | CLONE_NEWPID) < 0) | |
| die("unshare"); | |
| DBG("Created namespaces: user, net, mnt, ipc, uts, pid"); | |
| /* CLONE_NEWPID requires a second fork - the child becomes PID 1 */ | |
| pid_t inner_pid = fork(); | |
| if (inner_pid < 0) | |
| die("fork (inner)"); | |
| if (inner_pid > 0) { | |
| /* Intermediate process: wait for inner child and exit with its status */ | |
| int status; | |
| waitpid(inner_pid, &status, 0); | |
| _exit(WIFEXITED(status) ? WEXITSTATUS(status) : 1); | |
| } | |
| /* ---------- inner child (PID 1 in new namespace) ---------- */ | |
| IGNORE_RESULT(write(sync[1], "1", 1)); | |
| IGNORE_RESULT(read(sync[1], &uid, sizeof(uid))); | |
| IGNORE_RESULT(read(sync[1], &gid, sizeof(gid))); | |
| mkdir("/proc", 0555); | |
| mount("proc", "/proc", "proc", 0, ""); | |
| DBG("Mounted /proc"); | |
| mkdir("/dev", 0755); | |
| mkdir("/dev/net", 0755); | |
| mount("/dev/net", "/dev/net", NULL, MS_BIND | MS_REC, NULL); | |
| DBG("Bind mounted /dev/net"); | |
| int rfd = open("/tmp/resolv.conf", O_CREAT | O_WRONLY | O_TRUNC, 0644); | |
| IGNORE_RESULT(write(rfd, "nameserver 8.8.8.8\n", 19)); | |
| close(rfd); | |
| mount("/tmp/resolv.conf", "/etc/resolv.conf", NULL, MS_BIND, NULL); | |
| DBG("Configured /etc/resolv.conf with nameserver 8.8.8.8"); | |
| int sockfd = recv_fd(ctl[1]); | |
| int tunfd = tun_create("tun0"); | |
| /* network config inside child netns */ | |
| if_up_netlink("lo"); | |
| if_addr_ptp("tun0", "10.0.0.2", "10.0.0.1"); | |
| if_up("tun0"); | |
| add_default_route("tun0", "10.0.0.1"); | |
| DBG("Network setup: tun0 (10.0.0.2 -> 10.0.0.1), lo up"); | |
| fcntl(tunfd, F_SETFD, FD_CLOEXEC); | |
| send_fd(ctl[1], tunfd); | |
| close(tunfd); | |
| close(sockfd); | |
| /* Drop capabilities and privileges unless --root specified */ | |
| if (!keep_root) { | |
| drop_caps(); | |
| DBG("Dropped capabilities"); | |
| } else { | |
| DBG("Keeping root capabilities (--root specified)"); | |
| } | |
| DBG("Executing: %s", argv[0]); | |
| execvp(argv[0], argv); | |
| die("exec"); | |
| } | |
| /* ---------- parent ---------- */ | |
| close(ctl[1]); | |
| close(sync[1]); | |
| char tmp; | |
| IGNORE_RESULT(read(sync[0], &tmp, 1)); | |
| char path[128], map[64]; | |
| snprintf(path, sizeof(path), "/proc/%d/setgroups", pid); | |
| write_file(path, "deny"); | |
| snprintf(path, sizeof(path), "/proc/%d/uid_map", pid); | |
| if (keep_root) { | |
| /* Root mode: map 0 inside -> host uid outside */ | |
| snprintf(map, sizeof(map), "0 %d 1\n", uid); | |
| } else { | |
| /* Rootless mode: 1:1 mapping (uid inside = uid outside) */ | |
| snprintf(map, sizeof(map), "%d %d 1\n", uid, uid); | |
| } | |
| write_file(path, map); | |
| snprintf(path, sizeof(path), "/proc/%d/gid_map", pid); | |
| if (keep_root) { | |
| snprintf(map, sizeof(map), "0 %d 1\n", gid); | |
| } else { | |
| snprintf(map, sizeof(map), "%d %d 1\n", gid, gid); | |
| } | |
| write_file(path, map); | |
| IGNORE_RESULT(write(sync[0], &uid, sizeof(uid))); | |
| IGNORE_RESULT(write(sync[0], &gid, sizeof(gid))); | |
| send_fd(ctl[0], sp[1]); | |
| close(sp[1]); | |
| int tunfd = recv_fd(ctl[0]); | |
| event_loop(tunfd, pid); | |
| waitpid(pid, NULL, 0); | |
| return 0; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment