|
预备知识
1. 客户机物理页框到宿主机虚拟地址转换
http://blog.iyunv.com/zhuriyuxiao/article/details/8968781
http://www.tuicool.com/articles/NjY3uu
2. KVM API
简单的API例子
http://smilejay.com/2013/03/use-kvm-api/
hejie 同学的《使用KVM API实现Emulator Demo》
http://soulxu.github.io/blog/2014/08/11/use-kvm-api-write-emulator/
3. wenyi 同学的 《KVM 内存虚拟化及其实现》
http://www.ibm.com/developerworks/cn/linux/l-cn-kvm-mem/
4. KVM 官方文档
$ git clone http://git.kernel.org/pub/scm/virt/kvm/kvm.git
$ vim Documentation/virtual/kvm/api.txt
实战
该实例由Mark Wu同学提供。
KVM API General Description
· The kvm API is centered around file descriptors.
· An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle can be used to issue system ioctls.
· A KVM_CREATE_VM ioctl on this handle will create a VM file descriptor which can be used to issue VM ioctls.
· A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu and return a file descriptor pointing to it.
· Finally, ioctls on a vcpu fd can be used to control the vcpu, including the important task of actually running guest code.
· KVM related file descriptors in qemu.
1 (gdb) p kvm_state->fd
2 $1 = 3
3 (gdb) p kvm_state->vmfd
4 $2 = 4
5 (gdb) info threads
6 4 Thread 0x7f86a60f0700 (LWP 13455) 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
7 3 Thread 0x7f86a56ef700 (LWP 13456) 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
8 2 Thread 0x7f86a6af1700 (LWP 13960) 0x00007f86ad08075b in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
9 * 1 Thread 0x7f86ae478940 (LWP 13453) 0x00007f86a97772f3 in select () from /lib64/libc.so.6
10 (gdb) t 3
11 [Switching to thread 3 (Thread 0x7f86a56ef700 (LWP 13456))]#0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
12 (gdb) bt
13 #0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
14 #1 0x00007f86ae60a2e9 in qemu_cond_wait (cond=, mutex=) at qemu-thread-posix.c:113
15 #2 0x00007f86ae67772f in qemu_kvm_wait_io_event (arg=0x7f86b10a0930) at /home/mark/Work/qemu/qemu/cpus.c:710
16 #3 qemu_kvm_cpu_thread_fn (arg=0x7f86b10a0930) at /home/mark/Work/qemu/qemu/cpus.c:745
17 #4 0x00007f86ad07c7f1 in start_thread () from /lib64/libpthread.so.0
18 #5 0x00007f86a977e70d in clone () from /lib64/libc.so.6
19 (gdb) p ((CPUX86State *)0x7f86b10a0930)->kvm_fd
20 $3 = 12
21 (gdb) t 4
22 [Switching to thread 4 (Thread 0x7f86a60f0700 (LWP 13455))]#0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
23 (gdb) bt
24 #0 0x00007f86ad0803dc in pthread_cond_wait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
25 #1 0x00007f86ae60a2e9 in qemu_cond_wait (cond=, mutex=) at qemu-thread-posix.c:113
26 #2 0x00007f86ae67772f in qemu_kvm_wait_io_event (arg=0x7f86b1088a00) at /home/mark/Work/qemu/qemu/cpus.c:710
27 #3 qemu_kvm_cpu_thread_fn (arg=0x7f86b1088a00) at /home/mark/Work/qemu/qemu/cpus.c:745
28 #4 0x00007f86ad07c7f1 in start_thread () from /lib64/libpthread.so.0
29 #5 0x00007f86a977e70d in clone () from /lib64/libc.so.6
30 (gdb) p ((CPUX86State *)0x7f86b1088a00)->kvm_fd
31 $4 = 11
· Dump KVM related files via crash
1 crash> files 15011
2 PID: 15011 TASK: ffff880053ea0100 CPU: 0 COMMAND: "qemu-system-x86"
3 ROOT: / CWD: /home/mark/Work/qemu/qemu
4 FD FILE DENTRY INODE TYPE PATH
5 0 ffff880050b8c8c0 ffff88000ad77a80 ffff880134d13318 CHR /dev/pts/4
6 1 ffff880050b8c8c0 ffff88000ad77a80 ffff880134d13318 CHR /dev/pts/4
7 2 ffff880050b8c8c0 ffff88000ad77a80 ffff880134d13318 CHR /dev/pts/4
8 3 ffff88008491fa80 ffff880134c9b0c0 ffff88013b372a78 CHR /dev/kvm
9 4 ffff88012eb52140 ffff8800ae376e40 ffff88013b71e2d8 REG anon_inode:/kvm-vm
10 5 ffff8801357e7180 ffff8800ae3760c0 ffff88013b71e2d8 REG anon_inode:/[signalfd]
11 6 ffff880014255a80 ffff8800ae376180 ffff88013b71e2d8 REG anon_inode:/[eventfd]
12 7 ffff880014255a80 ffff8800ae376180 ffff88013b71e2d8 REG anon_inode:/[eventfd]
13 8 ffff880136751bc0 ffff880089da2c80 ffff88003f6490c0 REG /home/mark/Work/qemu/images/fedora.img
14 9 ffff8800a3c4d480 ffff8800ae376300 ffff880134cb1358 FIFO
15 10 ffff88008adc6980 ffff8800ae376300 ffff880134cb1358 FIFO
16 11 ffff88008ae865c0 ffff88012256f440 ffff88013b71e2d8 REG anon_inode:/kvm-vcpu
17 12 ffff88007bb11ec0 ffff88012256f2c0 ffff88013b71e2d8 REG anon_inode:/kvm-vcpu
18 crash> p ((struct file *)0xffff88008491fa80)->f_op
19 $5 = (const struct file_operations *) 0xffffffffa04f0e40
20 crash> sym 0xffffffffa04f0e40
21 ffffffffa04f0e40 (d) kvm_chardev_ops [kvm]
22 crash> px *((struct file*)0xffff88007bb11ec0)->f_op
23 $7 = {
24 owner = 0xffffffffa05249a0,
25 llseek = 0,
26 read = 0,
27 write = 0,
28 :
29 ioctl = 0,
30 unlocked_ioctl = 0xffffffffa04bae00,
31 compat_ioctl = 0xffffffffa04bae00,
32 mmap = 0xffffffffa04b9220,
33 open = 0,
34 flush = 0,
35 release = 0xffffffffa04bd830,
36 fsync = 0,
37 aio_fsync = 0,
38 :
39 setlease = 0
40 }
41 crash> sym 0xffffffffa04bae00
42 ffffffffa04bae00 (t) kvm_vcpu_ioctl [kvm]
43 crash> sym 0xffffffffa04b9220
44 ffffffffa04b9220 (t) kvm_vcpu_mmap [kvm]
45 crash> px ((struct file *)0xffff88012eb52140)->private_data
46 $15 = (void *) 0xffff880137c6c000
47 crash> px vm_list
48 vm_list = $16 = {
49 next = 0xffff880137c6c280,
50 prev = 0xffff880137c6c280
51 }
52 crash> sym vm_list
53 ffffffffa04f0aa0 (D) vm_list [kvm]
54 crash> px ((struct kvm*)0xffff880137c6c000)->vm_list
55 $17 = {
56 next = 0xffffffffa04f0aa0,
57 prev = 0xffffffffa04f0aa0
58 }
CPU Virtulization
vCPU initilization
· qemu-kvm backtrace of vcpu initlizaton
1 (gdb) bt
2 #0 qemu_init_vcpu (_env=0x7ffff8b18a00) at /home/mark/Work/qemu/qemu/cpus.c:936
3 #1 0x00007ffff7e9f869 in cpu_x86_init (cpu_model=0x7ffff7f8fca9 "qemu64") at /home/mark/Work/qemu/qemu/target-i386/helper.c:1263
4 #2 0x00007ffff7ee1de0 in pc_new_cpu (cpu_model=0x7ffff7f8fca9 "qemu64") at /home/mark/Work/qemu/qemu/hw/pc.c:936
5 #3 pc_cpus_init (cpu_model=0x7ffff7f8fca9 "qemu64") at /home/mark/Work/qemu/qemu/hw/pc.c:963
6 #4 0x00007ffff7ee297c in pc_init1 (system_memory=0x7ffff8b113f0, system_io=0x7ffff8b114f0, ram_size=536870912, boot_device=0x7fffffffdf10 "cad",
7 kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "", initrd_filename=0x0, cpu_model=0x0, pci_enabled=1, kvmclock_enabled=1)
8 at /home/mark/Work/qemu/qemu/hw/pc_piix.c:103
9 #5 0x00007ffff7ee30d8 in pc_init_pci (ram_size=536870912, boot_device=0x7fffffffdf10 "cad", kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "",
10 initrd_filename=0x0, cpu_model=) at /home/mark/Work/qemu/qemu/hw/pc_piix.c:245
11 #6 0x00007ffff7de57a9 in main (argc=, argv=, envp=) at /home/mark/Work/qemu/qemu/vl.c:3351
1 qemu_init_vcpu
2 qemu_kvm_start_vcpu
3 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env); /* One qemu thread per vCPU */
4 qemu_kvm_cpu_thread_fn
5 kvm_init_vcpu
6 +-->kvm_cpu_exec---+
7 | |
8 -------------------+
9
10 int kvm_init_vcpu(CPUState *env)
11 {
12 KVMState *s = kvm_state;
13 long mmap_size;
14 int ret;
15 DPRINTF("kvm_init_vcpu\n");
16 ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
17 if (ret < 0) {
18 DPRINTF("kvm_create_vcpu failed\n");
19 goto err;
20 }
21 env->kvm_fd = ret;
22 env->kvm_state = s;
23 env->kvm_vcpu_dirty = 1;
24 mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
25 if (mmap_size < 0) {
26 ret = mmap_size;
27 DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
28 goto err;
29 }
30 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
31 env->kvm_fd, 0);
32 :
33 }
Guest execution
· qemu function kvm_cpu_exec
1 int kvm_cpu_exec(CPUState *env)
2 {
3 struct kvm_run *run = env->kvm_run;
4 int ret, run_ret;
5 DPRINTF("kvm_cpu_exec()\n");
6 if (kvm_arch_process_async_events(env)) {
7 env->exit_request = 0;
8 return EXCP_HLT;
9 }
10 cpu_single_env = env;
11 do {
12 if (env->kvm_vcpu_dirty) {
13 kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
14 env->kvm_vcpu_dirty = 0;
15 }
16 kvm_arch_pre_run(env, run);
17 if (env->exit_request) {
18 DPRINTF("interrupt exit requested\n");
19 /*
20 * KVM requires us to reenter the kernel after IO exits to complete
21 * instruction emulation. This self-signal will ensure that we
22 * leave ASAP again.
23 */
24 qemu_cpu_kick_self();
25 }
26 cpu_single_env = NULL;
27 qemu_mutex_unlock_iothread();
28 run_ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
29 qemu_mutex_lock_iothread();
30 cpu_single_env = env;
31 kvm_arch_post_run(env, run);
32 kvm_flush_coalesced_mmio_buffer();
33 if (run_ret < 0) {
34 if (run_ret == -EINTR || run_ret == -EAGAIN) {
35 DPRINTF("io window exit\n");
36 ret = EXCP_INTERRUPT;
37 break;
38 }
39 DPRINTF("kvm run failed %s\n", strerror(-run_ret));
40 abort();
41 }
42 switch (run->exit_reason) {
43 case KVM_EXIT_IO:
44 DPRINTF("handle_io\n");
45 kvm_handle_io(run->io.port,
46 (uint8_t *)run + run->io.data_offset,
47 run->io.direction,
48 run->io.size,
49 run->io.count);
50 ret = 0;
51 break;
52 case KVM_EXIT_MMIO:
53 DPRINTF("handle_mmio\n");
54 cpu_physical_memory_rw(run->mmio.phys_addr,
55 run->mmio.data,
56 run->mmio.len,
57 run->mmio.is_write);
58 ret = 0;
59 break;
60 :
61 }
62 } while (ret == 0);
63 :
64 return ret;
65 }
· kernel code path
1 sys_ioctl
2 do_vfs_ioctl
3 vfs_ioctl
4 kvm_vcpu_ioctl /* kvm_vcpu_fops.unlocked_ioctl */
5 kvm_arch_vcpu_ioctl_run
6 __vcpu_run
7 vcpu_enter_guest
8 vmx_vcpu_run /* kvm_x86_ops->run */
9 |
10 v vm entry
11 +-----------------+
12 | guest code |
13 | on this cpu |
14 +-------------------+
15 | vm exit
16 v
17 vmx_handle_exit /* kvm_x86_ops->handle_exit */
18 return kvm_vmx_exit_handlers[exit_reason](vcpu)
· kernel exit handlers
1 /*
2 * The exit handlers return 1 if the exit was handled fully and guest execution
3 * may resume. Otherwise they set the kvm_run parameter to indicate what needs
4 * to be done to userspace and return 0.
5 */
6 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
7 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
8 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
9 [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
10 [EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
11 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
12 :
13 :
· guest runtime information shared between kvm mod and qemu-kvm
1 env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
2 env->kvm_fd, 0);
3 (gdb) p ((struct CPUX86State*)0x7fcdbe63f930)->kvm_run
4 $2 = (struct kvm_run *) 0x7fcdbcfa2000
5 (gdb) p *((struct CPUX86State*)0x7fcdbe63f930)->kvm_run
6 $3 = {request_interrupt_window = 0 '\000', padding1 = "\000\000\000\000\000\000", exit_reason = 10, ready_for_interrupt_injection = 0 '\000', if_flag =
7 0 '\000', padding2 = "\000", cr8 = 0, apic_base = 4276094976, {hw = {hardware_exit_reason = 4276093104}, fail_entry = {hardware_entry_failure_reason =
8 4276093104}, ex = {exception = 4276093104, error_code = 0}, io = {direction = 176 '\260', size = 0 '\000', port = 65248, count = 0, data_offset =
9 513418191540584448}, debug = {arch = {exception = 4276093104, pad = 0, pc = 513418191540584448, dr6 = 4294967300, dr7 = 0}}, mmio = {phys_addr =
10 4276093104, data = "\000\000\000\000 \a \a", len = 4, is_write = 1 '\001'}, hypercall = {nr = 4276093104, args = {513418191540584448, 4294967300, 0, 0,
11 0, 0}, ret = 0, longmode = 0, pad = 0}, tpr_access = {rip = 4276093104, is_write = 0, pad = 119539488}, s390_sieic = {icptcode = 176 '\260', ipa =
12 65248, ipb = 0}, s390_reset_flags = 4276093104, dcr = {dcrn = 4276093104, data = 0, is_write = 0 '\000'}, internal = {suberror = 4276093104, ndata = 0,
13 data = {513418191540584448, 4294967300, 0 }}, osi = {gprs = {4276093104, 513418191540584448, 4294967300, 0 }},
14 papr_hcall = {nr = 4276093104, ret = 513418191540584448, args = {4294967300, 0, 0, 0, 0, 0, 0, 0, 0}}, padding =
15 "\260\000\340\376\000\000\000\000\000\000\000\000 \a \a\004\000\000\000\001", '\000' }}
1 crash> vtop 7fcdbcfa2000
2 VIRTUAL PHYSICAL
3 7fcdbcfa2000 12eb3c000
4 PML: 137dfd7f8 => 136ff7067
5 PUD: 136ff79b0 => 134069067
6 PMD: 134069f38 => 13671c067
7 PTE: 13671cd10 => 800000012eb3c067
8 PAGE: 12eb3c000
9
10 PTE PHYSICAL FLAGS
11 800000012eb3c067 12eb3c000
12 (PRESENT|RW|USER|ACCESSED|DIRTY|NX)
13
14 VMA START END FLAGS FILE
15 ffff8800aac39b70 7fcdbcfa2000 7fcdbcfa5000 fb anon_inode:/kvm-vcpu
16 PAGE PHYSICAL MAPPING INDEX CNT FLAGS
17 ffffea0004237520 12eb3c000 0 ffff8800b72c9980 2 40000000000014
18 crash> px ((struct kvm*)0xffff880137c6c000)->vcpus[1]->run
19 $23 = (struct kvm_run *) 0xffff88012eb3c000
20 crash> vtop 0xffff88012eb3c000
21 VIRTUAL PHYSICAL
22 ffff88012eb3c000 12eb3c000
23 PML4 DIRECTORY: ffffffff81a85000
24 PAGE DIRECTORY: 1a86063
25 PUD: 1a86020 => a067
26 PMD: aba8 => 800000012ea001e3
27 PAGE: 12ea00000 (2MB)
28
29
30 PTE PHYSICAL FLAGS
31 800000012ea001e3 12ea00000 (PRESENT|RW|ACCESSED|DIRTY|PSE|GLOBAL|NX)
32
33 PAGE PHYSICAL MAPPING INDEX CNT FLAGS
34 ffffea0004237520 12eb3c000 0 ffff8800b72c9980 2 40000000000014
35 crash> px ((struct file*)0xffff88007bb11ec0)->private_data
36 $30 = (void *) 0xffff88013860c2b8
37 crash> px ((struct kvm_vcpu *)0xffff88013860c2b8)->run
38 $31 = (struct kvm_run *) 0xffff88012eb3c000
Physical Memory Virtualization
Physical memory intialization
· Qemu backtrace
1 (gdb) bt
2 #0 kvm_set_user_memory_region (s=0x7ffff8b100a0, slot=0x7ffff8b100a0) at /home/mark/Work/qemu/qemu/kvm-all.c:168
3 #1 0x00007ffff7ea3fae in kvm_set_phys_mem (client=, start_addr=, size=,
4 phys_offset=, log_dirty=false) at /home/mark/Work/qemu/qemu/kvm-all.c:650
5 #2 kvm_client_set_memory (client=, start_addr=, size=, phys_offset=,
6 log_dirty=false) at /home/mark/Work/qemu/qemu/kvm-all.c:663
7 #3 0x00007ffff7e8405a in cpu_notify_set_memory (start_addr=0, size=134217728, phys_offset=0, region_offset=0, log_dirty=false)
8 at /home/mark/Work/qemu/qemu/exec.c:1742
9 #4 cpu_register_physical_memory_log (start_addr=0, size=134217728, phys_offset=0, region_offset=0, log_dirty=false)
10 at /home/mark/Work/qemu/qemu/exec.c:2675
11 #5 0x00007ffff7eaac70 in address_space_update_topology_pass (as=0x7ffff82f31e0, old_view=..., new_view=..., adding=true)
12 at /home/mark/Work/qemu/qemu/memory.c:731
13 #6 0x00007ffff7eacf31 in address_space_update_topology (as=0x7ffff82f31e0) at /home/mark/Work/qemu/qemu/memory.c:746
14 #7 0x00007ffff7ead514 in memory_region_update_topology () at /home/mark/Work/qemu/qemu/memory.c:760
15 #8 0x00007ffff7ee1787 in pc_memory_init (system_memory=0x7ffff8b11430, kernel_filename=, kernel_cmdline=0x7ffff7f668eb "",
16 initrd_filename=0x0, below_4g_mem_size=134217728, above_4g_mem_size=0, rom_memory=0x7ffff8b32240, ram_memory=0x7fffffffe188)
17 at /home/mark/Work/qemu/qemu/hw/pc.c:996
18 #9 0x00007ffff7ee2d96 in pc_init1 (system_memory=0x7ffff8b11430, system_io=0x7ffff8b11530, ram_size=134217728, boot_device=0x7fffffffe500 "cad",
19 kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "", initrd_filename=0x0, cpu_model=0x0, pci_enabled=1, kvmclock_enabled=1)
20 at /home/mark/Work/qemu/qemu/hw/pc_piix.c:128
21 #10 0x00007ffff7ee30d8 in pc_init_pci (ram_size=134217728, boot_device=0x7fffffffe500 "cad", kernel_filename=0x0, kernel_cmdline=0x7ffff7f668eb "",
22 initrd_filename=0x0, cpu_model=) at /home/mark/Work/qemu/qemu/hw/pc_piix.c:245
23 #11 0x00007ffff7de57a9 in main (argc=, argv=, envp=) at /home/mark/Work/qemu/qemu/vl.c:3351
24 --------------------------------------------
25 kvm_set_user_memory_region
26 kvm_vm_ioctl
27 ioctl(kvm_context->vm_fd, KVM_SET_USER_MEMORY_REGION, ...)
Guest physical memory mapping
· dump gpa hva hpa mapping via crash
1 crash> px vm_list
2 vm_list = $7 = {
3 next = 0xffff880080cb4280,
4 prev = 0xffff880080cb4280
5 }
6 crash> struct kvm.vm_list
7 struct kvm {
8 [640] struct list_head vm_list;
9 }
10 crash> px 0xffff880080cb4280-640
11 $8 = 0xffff880080cb4000
12 crash> pd ((struct kvm *)0xffff880080cb4000)->memslots
13 $9 = (struct kvm_memslots *) 0xffff880139326000
14 crash> px *((struct kvm *)0xffff880080cb4000)->memslots
15 $6 = {
16 nmemslots = 0x23,
17 memslots = {{
18 base_gfn = 0x0,
19 npages = 0xa0,
20 flags = 0x0,
21 rmap = 0xffffc90016aac000,
22 dirty_bitmap = 0x0,
23 lpage_info = {0xffffc900175d6000, 0xffffc900175d9000},
24 userspace_addr = 0x7f30dbe00000,
25 user_alloc = 0x1,
26 id = 0x0
27 }, {
28 base_gfn = 0xfffe0,
29 npages = 0x20,
30 flags = 0x0,
31 rmap = 0xffffc90016a82000,
32 dirty_bitmap = 0x0,
33 lpage_info = {0xffffc90016a85000, 0xffffc90016a88000},
34 userspace_addr = 0x7f310b1f0000,
35 user_alloc = 0x1,
36 id = 0x1
37 }, {
38 base_gfn = 0xc0,
39 npages = 0xc,
40 flags = 0x0,
41 rmap = 0xffffc9001787f000,
42 dirty_bitmap = 0x0,
43 lpage_info = {0xffffc90017882000, 0xffffc90017885000},
44 userspace_addr = 0x7f30dbec0000,
45 user_alloc = 0x1,
46 id = 0x2
47 }, {
48 base_gfn = 0xfc000,
49 npages = 0x800,
50 flags = 0x1,
51 rmap = 0xffffc90017b39000,
52 dirty_bitmap = 0xffffc90017b45000,
53 lpage_info = {0xffffc90017b3f000, 0xffffc90017b42000},
54 userspace_addr = 0x7f3101c00000,
55 user_alloc = 0x1,
56 id = 0x3
57 }, {
58 base_gfn = 0xcc,
59 npages = 0x24,
60 flags = 0x0,
61 rmap = 0xffffc90017990000,
62 dirty_bitmap = 0x0,
63 lpage_info = {0xffffc90017993000, 0xffffc90017996000},
64 userspace_addr = 0x7f30dbecc000,
65 user_alloc = 0x1,
66 id = 0x4
67 }, {
68 base_gfn = 0xf0,
69 npages = 0x10,
70 flags = 0x0,
71 rmap = 0xffffc90017999000,
72 dirty_bitmap = 0x0,
73 lpage_info = {0xffffc9001799c000, 0xffffc9001799f000},
74 userspace_addr = 0x7f30dbef0000,
75 user_alloc = 0x1,
76 id = 0x5
77 }, {
78 base_gfn = 0x100,
79 npages = 0x1ff00,
80 flags = 0x0,
81 rmap = 0xffffc900179a2000,
82 dirty_bitmap = 0x0,
83 lpage_info = {0xffffc90017aa4000, 0xffffc90017aa7000},
84 userspace_addr = 0x7f30dbf00000,
85 user_alloc = 0x1,
86 id = 0x6
87 }, {
88 base_gfn = 0x0,
89 npages = 0x0,
90 flags = 0x0,
91 rmap = 0x0,
92 dirty_bitmap = 0x0,
93 lpage_info = {0x0, 0x0},
94 userspace_addr = 0x0,
95 user_alloc = 0x0,
96 id = 0x0
97 },
98
99 On Guest:
100 [iyunv@localhost ~]# ./hello
101 [0x400638]: Hello, world
102 crash> ps \|grep hello
103 2203 2112 0 ffff88001d68ae60 IN 0.1 4124 356 hello
104 crash> set 2203
105 PID: 2203
106 COMMAND: "hello"
107 TASK: ffff88001d68ae60 [THREAD_INFO: ffff88001da6c000]
108 CPU: 0
109 STATE: TASK_INTERRUPTIBLE
110 crash> rd 0x400638 2
111 400638: 77202c6f6c6c6548 255b000a646c726f Hello, world..[%
112 crash> vtop 0x400638
113 VIRTUAL PHYSICAL
114 400638 30b2638
115
116 PML: 1d669000 => 1dbfa067
117 PUD: 1dbfa000 => 1c82d067
118 PMD: 1c82d010 => 1ab49067
119 PTE: 1ab49000 => 30b2025
120 PAGE: 30b2000
121
122 On Host:
123 crash> px 0x7f30dbf00000+0x30b2638-0x100000
124 $7 = 0x7f30deeb2638
125 crash> rd 0x7f30deeb2638 2
126 7f30deeb2638: 77202c6f6c6c6548 255b000a646c726f Hello, world..[%
MMU Virtualization
Extended Page Table
· Overview
1 Guest CR3 EPT Base Pointer
2 | |
3 +-->+-------------------+ +---->+---------------------+
4 GVA--->| Guest Page Table |---> GPA ---> | Extended Page Table | ---> HPA
5 +-------------------+ +---------------------+
· EPT walkthrough
1 sh> px ((struct kvm_vcpu *)0xffff88007768c078)->arch.mmu
2 $18 = {
3 new_cr3 = 0xffffffffa04dca40 ,
4 page_fault = 0xffffffffa04e4410 ,
5 free = 0xffffffffa04e0870 ,
6 gva_to_gpa = 0xffffffffa04e4b70 ,
7 prefetch_page = 0xffffffffa04dc7a0 ,
8 sync_page = 0xffffffffa04dc7d0 ,
9 invlpg = 0xffffffffa04dc7e0 ,
10 root_hpa = 0x138457000,
11 root_level = 0x4,
12 shadow_root_level = 0x4,
13 base_role = {
14 word = 0x0,
15 {
16 glevels = 0x0,
17 level = 0x0,
18 quadrant = 0x0,
19 pad_for_nice_hex_output = 0x0,
20 direct = 0x0,
21 access = 0x0,
22 invalid = 0x0,
23 cr4_pge = 0x0,
24 nxe = 0x0,
25 cr0_wp = 0x0,
26 smep_andnot_wp = 0x0
27 }
28 },
29 pae_root = 0xffff88000d2c2000,
30 rsvd_bits_mask = {{0xfff0000000000, 0xfff0000000000, 0xfff0000000180, 0xfff0000000180}, {0x0, 0xfff00001fe000, 0xfff003fffe000, 0xfff0000000180}}
31 }
32
33 crash> px (0x30b2638>>39)&0x1ff
34 $19 = 0x0
35 crash> rd -p 0x138457000
36 138457000: 0000000043138007 ...C....
37 crash> px (0x30b2638>>30)&0x1ff
38 $20 = 0x0
39 crash> rd -p 0x43138000
40 43138000: 0000000108c3c007 ........
41 crash> px (0x30b2638>>21)&0x1ff
42 $21 = 0x18
43 crash> px (0x108c3c007 & ~0xfff)+ (8*0x18)
44 $22 = 0x108c3c0c0
45 crash> rd -p 0x108c3c0c0
46 108c3c0c0: 0000000125713007 .0q%....
47 crash> px (0x30b2638>>12)&0x1ff
48 $23 = 0xb2
49 crash> px (0x125713007 & ~0xfff) + (8*0xb2)
50 $24 = 0x125713590
51 crash> rd -p 0x125713590
52 125713590: 000000011289a277 w.......
53 crash> vtop 7f30deeb2638
54 VIRTUAL PHYSICAL
55 7f30deeb2638 11289a638
56 PML: 575e07f0 => 43236067
57 PUD: 43236618 => 7ef3c067
58 PMD: 7ef3c7b8 => 139495067
59 PTE: 139495590 => 800000011289a067
60 PAGE: 11289a000
61 PTE PHYSICAL FLAGS
62 800000011289a067 11289a000 (PRESENT|RW|USER|ACCESSED|DIRTY|NX)
63 VMA START END FLAGS FILE
64 ffff88005d100788 7f30dbe00000 7f30fbe00000 80100073
65 PAGE PHYSICAL MAPPING INDEX CNT FLAGS
66 ffffea0003c0e1b0 11289a000 ffff88000d307f61 7f30deeb2 1 4000000010006c
Shadow Page Table
· Overview
1 Guest CR3
2 |
3 +-->+-------------------+
4 GVA--->| Guest Page Table | ---> GPA
5 +-------------------+
6
7 Host CR3
8 |
9 +-->+-------------------+
10 GVA--->| Shadow Page Table | ---> HPA
11 +-------------------+
12 ~
· Shadow page table walkthrough (with option ept=no for kernel moduel kvm_intel)
1 crash> px ((struct kvm_vcpu *)0xffff88007768c078)->arch.mmu
2 mmu = {
3 new_cr3 = 0xffffffffa0914890 ,
4 page_fault = 0xffffffffa091a1b0 ,
5 free = 0xffffffffa0914880 ,
6 gva_to_gpa = 0xffffffffa0918b70 ,
7 prefetch_page = 0xffffffffa0915920 ,
8 sync_page = 0xffffffffa09177e0 ,
9 invlpg = 0xffffffffa0913b20 ,
10 root_hpa = 0x8886d000,
11 root_level = 0x4,
12 shadow_root_level = 0x4,
13 base_role = {
14 word = 0xe00004,
15 {
16 glevels = 0x4,
17 level = 0x0,
18 quadrant = 0x0,
19 pad_for_nice_hex_output = 0x0,
20 direct = 0x0,
21 access = 0x0,
22 invalid = 0x0,
23 cr4_pge = 0x1,
24 nxe = 0x1,
25 cr0_wp = 0x1,
26 smep_andnot_wp = 0x0
27 }
28 },
29 pae_root = 0xffff88008893e000,
30 rsvd_bits_mask = {{0xfff0000000000, 0xfff0000000000, 0xfff0000000180, 0xfff0000000180}, {0x0, 0xfff00001fe000, 0xfff003fffe000, 0xfff0000000180}}
31 },
32 crash> px (0x400608 >> 39) & 0x1ff
33 $17 = 0x0
34 crash> rd -p 0x8886d000
35 8886d000: 0000000081517027 'pQ.....
36 crash> px (0x400608 >> 30) & 0x1ff
37 $18 = 0x0
38 crash> px (0x81517027 & ~0xfff)
39 $19 = 0x81517000
40 crash> rd -p 0x81517000
41 81517000: 000000008159f027 '.Y.....
42 crash> px (0x400608 >> 21) & 0x1ff
43 $20 = 0x2
44 crash> px (0x8159f027 & ~0xfff)+(8*0x2)
45 $21 = 0x8159f010
46 crash> rd -p 0x8159f010
47 8159f010: 0000000069fd7027 'p.i....
48 crash> px (0x400608 >> 12) & 0x1ff
49 $22 = 0x0
50 crash> rd -p 0x69fd7000
51 69fd7000: 0000000055b99265 e..U....
52 crash> rd -p 55b99608 2
53 55b99608: 77202c6f6c6c6548 255b000a646c726f Hello, world..[%
|
|