Skip to content

Instantly share code, notes, and snippets.

@mrpre
Created January 28, 2026 08:49
Show Gist options
  • Select an option

  • Save mrpre/eae853b72ac6a750f5d45d64ddac1e81 to your computer and use it in GitHub Desktop.

Select an option

Save mrpre/eae853b72ac6a750f5d45d64ddac1e81 to your computer and use it in GitHub Desktop.
ip_rt_bug reproducer
/*
* ip_rt_bug race condition reproducer
*
* Requires kernel with mdelay(2000) in icmp_route_lookup before ip_route_input.
*
* Race scenario:
* 1. Send packet with src=172.20.20.1 (not local), dst=172.30.0.1
* 2. Malformed IP option triggers icmp_send -> icmp_route_lookup
* 3. First xfrm_lookup returns -EPERM (block policy), enters reverse path
* 4. Kernel delays 2s before ip_route_input
* 5. During delay: userspace adds 172.20.20.1 as local + ICMP policy
* 6. ip_route_input returns LOCAL route with dst.output=ip_rt_bug
* 7. Second xfrm_lookup succeeds, returns this route for ICMP output
* 8. dst_output calls ip_rt_bug -> WARN_ON
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <pthread.h>
#include <sys/ioctl.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <arpa/inet.h>
#include <netinet/ip.h>
#define TUN_NAME "tun_race"
#define LOCAL_ADDR "172.20.20.1"
#define TUN_ADDR "172.20.20.2"
#define REMOTE_ADDR "172.30.0.1"
static int tun_fd;
static int tun_alloc(const char *name)
{
struct ifreq ifr = { .ifr_flags = IFF_TUN | IFF_NO_PI };
int fd;
if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
return -1;
strncpy(ifr.ifr_name, name, IFNAMSIZ - 1);
if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
close(fd);
return -1;
}
return fd;
}
static uint16_t ip_csum(void *data, int len)
{
uint32_t sum = 0;
uint16_t *p = data;
while (len > 1) {
sum += *p++;
len -= 2;
}
if (len)
sum += *(uint8_t *)p;
sum = (sum >> 16) + (sum & 0xffff);
sum += sum >> 16;
return ~sum;
}
/* Build packet with malformed timestamp option to trigger ICMP error */
static int build_packet(uint8_t *buf)
{
struct iphdr *iph = (struct iphdr *)buf;
uint8_t *opt = buf + sizeof(*iph);
int ihl = 7; /* 5 + 2 words for options */
memset(buf, 0, 64);
/* Malformed timestamp: length=5, pointer=5 triggers error */
opt[0] = 0x44; /* Timestamp */
opt[1] = 5; /* Length */
opt[2] = 5; /* Pointer */
opt[3] = 0;
opt[4] = 0;
opt[5] = 1; /* NOP */
opt[6] = 1; /* NOP */
opt[7] = 0; /* END */
iph->version = 4;
iph->ihl = ihl;
iph->tot_len = htons(ihl * 4 + 20);
iph->ttl = 64;
iph->protocol = IPPROTO_TCP;
iph->saddr = inet_addr(LOCAL_ADDR);
iph->daddr = inet_addr(REMOTE_ADDR);
iph->check = ip_csum(iph, ihl * 4);
return ntohs(iph->tot_len);
}
/* Race thread: modify network config during kernel delay */
static void *race_thread(void *arg)
{
(void)arg;
usleep(500000); /* 500ms into the 2s kernel delay */
/* Add LOCAL_ADDR as local - ip_route_input will return LOCAL route */
system("ip addr add " LOCAL_ADDR "/32 dev " TUN_NAME " 2>/dev/null");
/* Replace block policy with ICMP allow policy */
system("ip xfrm policy delete src 0.0.0.0/0 dst 0.0.0.0/0 dir out 2>/dev/null");
system("ip xfrm policy add src 0.0.0.0/0 dst 0.0.0.0/0 dir out flag icmp 2>/dev/null");
return NULL;
}
int main(int argc, char *argv[])
{
uint8_t pkt[128];
int pkt_len, i, n = 3;
if (argc > 1)
n = atoi(argv[1]);
tun_fd = tun_alloc(TUN_NAME);
if (tun_fd < 0) {
perror("tun_alloc");
return 1;
}
/* Setup TUN without LOCAL_ADDR initially */
system("ip link set " TUN_NAME " up");
system("ip addr add " TUN_ADDR "/24 dev " TUN_NAME);
system("ip route add 172.30.0.0/24 dev " TUN_NAME);
system("sysctl -qw net.ipv4.ip_forward=1");
system("sysctl -qw net.ipv4.conf." TUN_NAME ".rp_filter=0");
system("sysctl -qw net.ipv4.conf.all.rp_filter=0");
system("sysctl -qw net.ipv4.conf." TUN_NAME ".accept_local=1");
system("sysctl -qw net.ipv4.conf.all.accept_local=1");
pkt_len = build_packet(pkt);
for (i = 0; i < n; i++) {
pthread_t th;
/* Reset: remove LOCAL_ADDR, set block policy */
system("ip addr del " LOCAL_ADDR "/32 dev " TUN_NAME " 2>/dev/null");
system("ip xfrm policy flush");
system("ip xfrm policy add src 0.0.0.0/0 dst 0.0.0.0/0 dir out action block priority 100");
pthread_create(&th, NULL, race_thread, NULL);
write(tun_fd, pkt, pkt_len);
pthread_join(th, NULL);
sleep(3); /* Wait for kernel to finish */
}
close(tun_fd);
printf("Done. Check: dmesg | grep -E '(ip_rt_bug|WARNING)'\n");
return 0;
}

ip_rt_bug Race Condition Reproducer

This reproducer triggers a race condition in icmp_route_lookup() that causes ip_rt_bug() to be called, resulting in a kernel WARNING.

Bug Description

When sending an ICMP error reply, icmp_route_lookup() may use ip_route_input() to find a route in the reverse path. If the destination address becomes local (e.g., added via ip addr add) between the initial check and the route lookup, ip_route_input() returns a LOCAL route with dst.output = ip_rt_bug. This route is then incorrectly used for output, triggering ip_rt_bug().

Kernel Patch Required

Apply the following patch to net/ipv4/icmp.c to widen the race window:

@@ -549,6 +551,15 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
 			err = PTR_ERR(rt2);
 			goto relookup_failed;
 		}
+
+		/*
+		 * DEBUG: Delay before ip_route_input to widen race window
+		 * fl4_dec.daddr = 172.20.20.1 (0xac141401)
+		 * During delay, userspace can add this addr as local
+		 */
+		if (fl4_dec.daddr == htonl(0xac141401))
+			mdelay(2000);
+
 		/* Ugh! */
 		orefdst = skb_dstref_steal(skb_in);
 		err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,

Then rebuild and boot into the patched kernel.

Build

gcc -o ip_rt_bug_race ip_rt_bug_race.c -lpthread

Run

sudo ./ip_rt_bug_race [iterations]

Default is 3 iterations. Each iteration takes ~3 seconds due to the kernel delay.

Check Result

dmesg | grep -E '(ip_rt_bug|WARNING|cut here)'

If the bug is triggered, you will see a kernel WARNING with call trace from ip_rt_bug().

Expected Output (Bug Triggered)

[  367.806750] ------------[ cut here ]------------
[  367.806800] WARNING: net/ipv4/route.c:1275 at ip_rt_bug+0x21/0x30, CPU#0: ip_rt_bug_race/9897
[  367.807027] Modules linked in:
[  367.807094] CPU: 0 UID: 0 PID: 9897 Comm: ip_rt_bug_race Tainted: G        W           6.19.0-rc6-00850-gce31db7bb90b-dirty #118 PREEMPT(none)
[  367.807153] Tainted: [W]=WARN
[  367.807171] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  367.807198] RIP: 0010:ip_rt_bug+0x21/0x30
[  367.807257] Code: 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 55 48 89 e5 53 48 89 d3 e8 6f f7 82 f8 ba 02 00 00 00 48 89 de 31 ff e8 f0 9b 79 ff 90 <0f> 0b 90 31 c0 48 8b 5d f8 c9 e9 00 6a 80
[  367.807291] RSP: 0018:ffff888102927448 EFLAGS: 00010293
[  367.807343] RAX: 0000000000000000 RBX: ffff8880122c3680 RCX: ffff888063c33f00
[  367.807369] RDX: ffff88810a808000 RSI: ffffffff88934d97 RDI: ffff88810009d680
[  367.807394] RBP: ffff888102927450 R08: dffffc0000000001 R09: 0000000000000019
[  367.807419] R10: 0000000000000000 R11: 00000000302f4c6b R12: ffffffff90156680
[  367.807443] R13: ffff8881015c2a00 R14: ffff8880122c3680 R15: ffff8881015c2b98
[  367.807469] FS:  00007f3020111740(0000) GS:ffff8880d3dff000(0000) knlGS:0000000000000000
[  367.807502] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  367.807527] CR2: 00005588693be008 CR3: 000000001af0a000 CR4: 0000000000752ef0
[  367.807551] PKRU: 55555554
[  367.807567] Call Trace:
[  367.807579]  <TASK>
[  367.807593]  ip_push_pending_frames+0x202/0x240
[  367.807662]  icmp_push_reply+0x30d/0x430
[  367.807719]  __icmp_send+0x1149/0x24f0
[  367.807761]  ? __pfx___icmp_send+0x10/0x10
[  367.807800]  ? __pfx_ip_route_input_slow+0x10/0x10
[  367.807838]  ? x64_sys_call+0x7d/0x2100
[  367.807934]  ? do_syscall_64+0xa4/0xf80
[  367.808022]  ? __sanitizer_cov_trace_switch+0x58/0xa0
[  367.808091]  ip_options_compile+0xa2/0xd0
[  367.808136]  ? __pfx_ip_options_compile+0x10/0x10
[  367.808183]  ip_rcv_finish_core+0x829/0x1950
[  367.808224]  ? ip_rcv_core+0x160/0xcd0
[  367.808264]  ip_rcv+0x2d7/0x420
[  367.808303]  ? __pfx_ip_rcv+0x10/0x10
[  367.808344]  ? __kasan_check_write+0x18/0x20
[  367.808420]  ? _copy_from_iter+0x24f/0x1560
[  367.808510]  __netif_receive_skb_one_core+0x185/0x1f0
[  367.808591]  ? __pfx_ip_rcv+0x10/0x10
[  367.808638]  ? __pfx___netif_receive_skb_one_core+0x10/0x10
[  367.808677]  ? __virtio_net_hdr_to_skb+0x59d/0x1410
[  367.808758]  ? __sanitizer_cov_trace_const_cmp1+0x1e/0x30
[  367.808799]  ? kvm_clock_get_cycles+0x46/0x70
[  367.808859]  ? __sanitizer_cov_trace_cmp4+0x1a/0x20
[  367.808897]  __netif_receive_skb+0x24/0x130
[  367.808933]  netif_receive_skb+0x90/0x450
[  367.808969]  ? __pfx_netif_receive_skb+0x10/0x10
[  367.809006]  ? virtio_net_hdr_tnl_to_skb+0x631/0x900
[  367.809054]  tun_get_user+0x3413/0x3fb0
[  367.809099]  ? wakeup_preempt+0xf5/0x250
[  367.809165]  ? __pfx_tun_get_user+0x10/0x10
[  367.809211]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[  367.809251]  ? ref_tracker_alloc+0x2da/0x570
[  367.809306]  ? __pfx_ref_tracker_alloc+0x10/0x10
[  367.809350]  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
[  367.809390]  ? put_pid.part.0+0xbc/0x150
[  367.809433]  ? put_pid+0x28/0x40
[  367.809462]  ? apparmor_file_permission+0x18f/0x310
[  367.809521]  tun_chr_write_iter+0xe4/0x220
[  367.809566]  ? tun_chr_write_iter+0xe4/0x220
[  367.809612]  vfs_write+0x6b8/0xe10
[  367.809665]  ? __pfx_tun_chr_write_iter+0x10/0x10
[  367.809712]  ? __pfx_vfs_write+0x10/0x10
[  367.809755]  ? __set_task_blocked+0xa9/0x210
[  367.809802]  ksys_write+0x128/0x230
[  367.809852]  ? __pfx_ksys_write+0x10/0x10
[  367.809894]  ? __audit_syscall_entry+0x38e/0x4e0
[  367.809955]  __x64_sys_write+0x76/0xb0
[  367.809998]  x64_sys_call+0x7d/0x2100
[  367.810044]  do_syscall_64+0xa4/0xf80
[  367.810097]  entry_SYSCALL_64_after_hwframe+0x77/0x7f
[  367.810133] RIP: 0033:0x7f301ff1c5ad
[  367.810163] Code: e5 48 83 ec 20 48 89 55 e8 48 89 75 f0 89 7d f8 e8 a8 bf f7 ff 48 8b 55 e8 48 8b 75 f0 41 89 c0 8b 7d f8 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 4b
[  367.810196] RSP: 002b:00007ffd7f3696b0 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
[  367.810230] RAX: ffffffffffffffda RBX: 00007ffd7f3698c8 RCX: 00007f301ff1c5ad
[  367.810256] RDX: 0000000000000030 RSI: 00007ffd7f369710 RDI: 0000000000000003
[  367.810279] RBP: 00007ffd7f3696d0 R08: 0000000000000000 R09: 00007ffd7f3695f7
[  367.810303] R10: 0000000000000008 R11: 0000000000000293 R12: 0000000000000002
[  367.810326] R13: 0000000000000000 R14: 000055884357fd38 R15: 00007f302015a000
[  367.810353]  </TASK>
[  367.810367] ---[ end trace 0000000000000000 ]---
root@bms-ytl-d1-app-10-251-176-23:~# QEMU: Terminated
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment