$ unshare --user --map-root-user --net --mount
[root@incensed-gawain ~]# echo $$
2646
strace -f slirp4netns --configure --mtu=65520 2646 tap0
Note: Your fd are still available
Wait for the child to communicate back on the socketpair
socketpair(AF_UNIX, SOCK_STREAM, 0, [3, 4]) = 0
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f5bc798b810) = 2667
wait4(2667, strace: Process 2667 attached
<unfinished ...>
Creates the tap interface
[pid 2667] openat(AT_FDCWD, "/proc/2646/ns/user", O_RDONLY) = 5
[pid 2667] openat(AT_FDCWD, "/proc/2646/ns/net", O_RDONLY) = 6
[pid 2667] setns(5, CLONE_NEWUSER) = 0
[pid 2667] setns(6, CLONE_NEWNET) = 0
[pid 2667] close(5) = 0
[pid 2667] close(6) = 0
[pid 2667] openat(AT_FDCWD, "/dev/net/tun", O_RDWR) = 5
The tap fd in the child is 5
[pid 2667] ioctl(5, TUNSETIFF, 0x7ffd60075390) = 0
[pid 2667] socket(AF_INET, SOCK_DGRAM, IPPROTO_IP) = 6
[pid 2667] ioctl(6, SIOCSIFFLAGS, {ifr_name="tap0", ifr_flags=IFF_UP|IFF_RUNNING}) = 0
[pid 2667] ioctl(6, SIOCSIFMTU, {ifr_name="tap0", ifr_mtu=65520}) = 0
[pid 2667] ioctl(6, SIOCSIFADDR, {ifr_name="tap0", ifr_addr={sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("10.0.2.100")}}) = 0
[pid 2667] ioctl(6, SIOCSIFNETMASK, {ifr_name="tap0", ifr_netmask={sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("255.255.255.0")}}) = 0
[pid 2667] ioctl(6, SIOCADDRT, 0x7ffd60075390) = 0
Use outof band data to send the fd 5 back to the parent process running on the host
https://linux.die.net/man/2/sendmsg
MSG_OOB
Sends out-of-band data on sockets that support this notion (e.g., of type SOCK_STREAM); the underlying protocol must also support out-of-band data
[pid 2667] sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=20, msg_flags=0}, 0) = 1
[pid 2667] write(2, "sent tapfd=5 for tap0\n", 22sent tapfd=5 for tap0
) = 22
[pid 2667] close(4) = 0
[pid 2667] exit_group(0) = ?
[pid 2667] +++ exited with 0 +++
Picks up the fd 5. This fd is read from to get packets from the container.
That is how network traffic makes it across the network ns even though tap interfaces cannot cross a network namespace boundary
<... wait4 resumed> [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 2667
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=2667, si_uid=1000, si_status=0, si_utime=0, si_stime=0} ---
recvmsg(3, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0", iov_len=1}], msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, cmsg_data=[5]}], msg_controllen=24, msg_flags=0}, 0) = 1
write(2, "received tapfd=5\n", 17received tapfd=5
) = 17
close(3) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x1), ...}) = 0
write(1, "Starting slirp\n", 15Starting slirp
) = 15
write(1, "* MTU: 65520\n", 25* MTU: 65520
) = 25
write(1, "* Network: 10.0.2.0\n", 28* Network: 10.0.2.0
) = 28
write(1, "* Netmask: 255.255.255.0"..., 33* Netmask: 255.255.255.0
) = 33
write(1, "* Gateway: 10.0.2.2\n", 28* Gateway: 10.0.2.2
) = 28
write(1, "* DNS: 10.0.2.3\n", 28* DNS: 10.0.2.3
) = 28
write(1, "* Recommended IP: 10.0.2.100\n", 30* Recommended IP: 10.0.2.100
) = 30
write(1, "WARNING: 127.0.0.1:* on the host"..., 127WARNING: 127.0.0.1:* on the host is accessible as 10.0.2.2 (set --disable-host-loopback to prohibit connecting to 127.0.0.1:*)
) = 127
rt_sigaction(SIGPIPE, {sa_handler=SIG_IGN, sa_mask=[PIPE], sa_flags=SA_RESTORER|SA_RESTART, sa_restorer=0x7f5bc73e2f30}, {sa_handler=SIG_DFL, sa_mask=[], sa_flags=0}, 8) = 0
poll([{fd=5, events=POLLIN|POLLHUP}], 1, 1000) = 1 ([{fd=5, revents=POLLIN}])
read(5, "33\0\0\0\26\372N1\230}\325\206\335`\0\0\0\0$\0\1\0\0\0\0\0\0\0\0\0\0"..., 65536) = 90
brk(NULL) = 0xe03000
brk(0xe2e000) = 0xe2e000
poll([{fd=5, events=POLLIN|POLLHUP}], 1, 1000) = 1 ([{fd=5, revents=POLLIN}])