Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


Merge branch 'Release-1.2' of ssh://palacios@newskysaw.cs.northwestern.edu//home...
[palacios.git] / palacios / src / devices / lnx_virtio_blk.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_dev_mgr.h>
22 #include <devices/lnx_virtio_pci.h>
23 #include <palacios/vm_guest_mem.h>
24
25 #include <devices/pci.h>
26
27
28
29 #ifndef CONFIG_DEBUG_VIRTIO_BLK
30 #undef PrintDebug
31 #define PrintDebug(fmt, args...)
32 #endif
33
34
35 #define SECTOR_SIZE 512
36
37 #define BLK_CAPACITY_PORT     20
38 #define BLK_MAX_SIZE_PORT     28
39 #define BLK_MAX_SEG_PORT      32
40 #define BLK_CYLINDERS_PORT    36
41 #define BLK_HEADS_PORT        38
42 #define BLK_SECTS_PORT        39
43
44 #define BLK_IN_REQ            0
45 #define BLK_OUT_REQ           1
46 #define BLK_SCSI_CMD          2
47
48 #define BLK_BARRIER_FLAG     0x80000000
49
50 #define BLK_STATUS_OK             0
51 #define BLK_STATUS_ERR            1
52 #define BLK_STATUS_NOT_SUPPORTED  2
53
54
55 struct blk_config {
56     uint64_t capacity;
57     uint32_t max_size;
58     uint32_t max_seg;
59     uint16_t cylinders;
60     uint8_t heads;
61     uint8_t sectors;
62 } __attribute__((packed));
63
64
65
66 struct blk_op_hdr {
67     uint32_t type;
68     uint32_t prior;
69     uint64_t sector;
70 } __attribute__((packed));
71
72 #define QUEUE_SIZE 128
73
74 /* Host Feature flags */
75 #define VIRTIO_BARRIER       0x01       /* Does host support barriers? */
76 #define VIRTIO_SIZE_MAX      0x02       /* Indicates maximum segment size */
77 #define VIRTIO_SEG_MAX       0x04       /* Indicates maximum # of segments */
78 #define VIRTIO_LEGACY_GEOM   0x10       /* Indicates support of legacy geometry */
79
80
81 struct virtio_dev_state {
82     struct vm_device * pci_bus;
83     struct list_head dev_list;
84     struct guest_info * vm;
85 };
86
87 struct virtio_blk_state {
88
89     struct pci_device * pci_dev;
90     struct blk_config block_cfg;
91     struct virtio_config virtio_cfg;
92
93     
94     struct virtio_queue queue;
95
96     struct v3_dev_blk_ops * ops;
97
98     void * backend_data;
99
100     int io_range_size;
101
102     struct virtio_dev_state * virtio_dev;
103
104     struct list_head dev_link;
105 };
106
107
108 static int virtio_free(struct vm_device * dev) {
109     return -1;
110 }
111
112 static int blk_reset(struct virtio_blk_state * virtio) {
113
114     virtio->queue.ring_desc_addr = 0;
115     virtio->queue.ring_avail_addr = 0;
116     virtio->queue.ring_used_addr = 0;
117     virtio->queue.pfn = 0;
118     virtio->queue.cur_avail_idx = 0;
119
120     virtio->virtio_cfg.status = 0;
121     virtio->virtio_cfg.pci_isr = 0;
122     return 0;
123 }
124
125
126 static int virtio_reset(struct vm_device * dev) {
127     struct virtio_dev_state * dev_state = (struct virtio_dev_state *)(dev->private_data);
128     struct virtio_blk_state * blk_state = NULL;
129
130     list_for_each_entry(blk_state, &(dev_state->dev_list), dev_link) {
131         blk_reset(blk_state);
132     }
133
134     return 0;
135 }
136
137 static int handle_read_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
138     int ret = -1;
139
140     PrintDebug("Reading Disk\n");
141     ret = blk_state->ops->read(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
142     *sector += (len / SECTOR_SIZE);
143
144     return ret;
145 }
146
147
148 static int handle_write_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
149     int ret = -1;
150
151     PrintDebug("Writing Disk\n");
152     ret = blk_state->ops->write(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
153     *sector += (len / SECTOR_SIZE);
154
155     return ret;
156 }
157
158
159
160 // multiple block operations need to increment the sector 
161
162 static int handle_block_op(struct virtio_blk_state * blk_state, struct blk_op_hdr * hdr, 
163                            struct vring_desc * buf_desc, uint8_t * status) {
164     uint8_t * buf = NULL;
165
166     PrintDebug("Handling Block op\n");
167     if (guest_pa_to_host_va(blk_state->virtio_dev->vm, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
168         PrintError("Could not translate buffer address\n");
169         return -1;
170     }
171
172     PrintDebug("Sector=%p Length=%d\n", (void *)(addr_t)(hdr->sector), buf_desc->length);
173
174     if (hdr->type == BLK_IN_REQ) {
175         if (handle_read_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
176             *status = BLK_STATUS_ERR;
177             return -1;
178         } else {
179             *status = BLK_STATUS_OK;
180         }
181     } else if (hdr->type == BLK_OUT_REQ) {
182         if (handle_write_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
183             *status = BLK_STATUS_ERR;
184             return -1;
185         } else {
186             *status = BLK_STATUS_OK;
187         }
188     } else if (hdr->type == BLK_SCSI_CMD) {
189         PrintError("VIRTIO: SCSI Command Not supported!!!\n");
190         *status = BLK_STATUS_NOT_SUPPORTED;
191         return -1;
192     }
193
194     PrintDebug("Returning Status: %d\n", *status);
195
196     return 0;
197 }
198
199 static int get_desc_count(struct virtio_queue * q, int index) {
200     struct vring_desc * tmp_desc = &(q->desc[index]);
201     int cnt = 1;
202     
203     while (tmp_desc->flags & VIRTIO_NEXT_FLAG) {
204         tmp_desc = &(q->desc[tmp_desc->next]);
205         cnt++;
206     }
207
208     return cnt;
209 }
210
211
212
213 static int handle_kick(struct virtio_blk_state * blk_state) {  
214     struct virtio_queue * q = &(blk_state->queue);
215
216     PrintDebug("VIRTIO KICK: cur_index=%d (mod=%d), avail_index=%d\n", 
217                q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index);
218
219     while (q->cur_avail_idx < q->avail->index) {
220         struct vring_desc * hdr_desc = NULL;
221         struct vring_desc * buf_desc = NULL;
222         struct vring_desc * status_desc = NULL;
223         struct blk_op_hdr hdr;
224         addr_t hdr_addr = 0;
225         uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
226         int desc_cnt = get_desc_count(q, desc_idx);
227         int i = 0;
228         uint8_t * status_ptr = NULL;
229         uint8_t status = BLK_STATUS_OK;
230         uint32_t req_len = 0;
231
232         PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE);
233
234         if (desc_cnt < 3) {
235             PrintError("Block operations must include at least 3 descriptors\n");
236             return -1;
237         }
238
239         hdr_desc = &(q->desc[desc_idx]);
240
241
242         PrintDebug("Header Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", hdr_desc, 
243                    (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next);    
244
245         if (guest_pa_to_host_va(blk_state->virtio_dev->vm, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
246             PrintError("Could not translate block header address\n");
247             return -1;
248         }
249
250         // We copy the block op header out because we are going to modify its contents
251         memcpy(&hdr, (void *)hdr_addr, sizeof(struct blk_op_hdr));
252         
253         PrintDebug("Blk Op Hdr (ptr=%p) type=%d, sector=%p\n", (void *)hdr_addr, hdr.type, (void *)hdr.sector);
254
255         desc_idx = hdr_desc->next;
256
257         for (i = 0; i < desc_cnt - 2; i++) {
258             uint8_t tmp_status = BLK_STATUS_OK;
259
260             buf_desc = &(q->desc[desc_idx]);
261
262             PrintDebug("Buffer Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", buf_desc, 
263                        (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next);
264
265             if (handle_block_op(blk_state, &hdr, buf_desc, &tmp_status) == -1) {
266                 PrintError("Error handling block operation\n");
267                 return -1;
268             }
269
270             if (tmp_status != BLK_STATUS_OK) {
271                 status = tmp_status;
272             }
273
274             req_len += buf_desc->length;
275             desc_idx = buf_desc->next;
276         }
277
278         status_desc = &(q->desc[desc_idx]);
279
280         PrintDebug("Status Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", status_desc, 
281                    (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next);
282
283         if (guest_pa_to_host_va(blk_state->virtio_dev->vm, status_desc->addr_gpa, (addr_t *)&(status_ptr)) == -1) {
284             PrintError("Could not translate status address\n");
285             return -1;
286         }
287
288         req_len += status_desc->length;
289         *status_ptr = status;
290
291         q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
292         q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to????
293
294         q->used->index++;
295         q->cur_avail_idx++;
296     }
297
298     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
299         PrintDebug("Raising IRQ %d\n",  blk_state->pci_dev->config_header.intr_line);
300         v3_pci_raise_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
301         blk_state->virtio_cfg.pci_isr = 1;
302     }
303
304     return 0;
305 }
306
307 static int virtio_io_write(uint16_t port, void * src, uint_t length, void * private_data) {
308     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
309     int port_idx = port % blk_state->io_range_size;
310
311
312     PrintDebug("VIRTIO BLOCK Write for port %d (index=%d) len=%d, value=%x\n", 
313                port, port_idx,  length, *(uint32_t *)src);
314
315
316
317     switch (port_idx) {
318         case GUEST_FEATURES_PORT:
319             if (length != 4) {
320                 PrintError("Illegal write length for guest features\n");
321                 return -1;
322             }
323             
324             blk_state->virtio_cfg.guest_features = *(uint32_t *)src;
325             PrintDebug("Setting Guest Features to %x\n", blk_state->virtio_cfg.guest_features);
326
327             break;
328         case VRING_PG_NUM_PORT:
329             if (length == 4) {
330                 addr_t pfn = *(uint32_t *)src;
331                 addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT);
332
333
334                 blk_state->queue.pfn = pfn;
335                 
336                 blk_state->queue.ring_desc_addr = page_addr ;
337                 blk_state->queue.ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc));
338                 blk_state->queue.ring_used_addr = ( blk_state->queue.ring_avail_addr + \
339                                                  sizeof(struct vring_avail)    + \
340                                                  (QUEUE_SIZE * sizeof(uint16_t)));
341                 
342                 // round up to next page boundary.
343                 blk_state->queue.ring_used_addr = (blk_state->queue.ring_used_addr + 0xfff) & ~0xfff;
344
345                 if (guest_pa_to_host_va(blk_state->virtio_dev->vm, blk_state->queue.ring_desc_addr, (addr_t *)&(blk_state->queue.desc)) == -1) {
346                     PrintError("Could not translate ring descriptor address\n");
347                     return -1;
348                 }
349
350
351                 if (guest_pa_to_host_va(blk_state->virtio_dev->vm, blk_state->queue.ring_avail_addr, (addr_t *)&(blk_state->queue.avail)) == -1) {
352                     PrintError("Could not translate ring available address\n");
353                     return -1;
354                 }
355
356
357                 if (guest_pa_to_host_va(blk_state->virtio_dev->vm, blk_state->queue.ring_used_addr, (addr_t *)&(blk_state->queue.used)) == -1) {
358                     PrintError("Could not translate ring used address\n");
359                     return -1;
360                 }
361
362                 PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n",
363                            (void *)(blk_state->queue.ring_desc_addr),
364                            (void *)(blk_state->queue.ring_avail_addr),
365                            (void *)(blk_state->queue.ring_used_addr));
366
367                 PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", 
368                            blk_state->queue.desc, blk_state->queue.avail, blk_state->queue.used);
369
370             } else {
371                 PrintError("Illegal write length for page frame number\n");
372                 return -1;
373             }
374             break;
375         case VRING_Q_SEL_PORT:
376             blk_state->virtio_cfg.vring_queue_selector = *(uint16_t *)src;
377
378             if (blk_state->virtio_cfg.vring_queue_selector != 0) {
379                 PrintError("Virtio Block device only uses 1 queue, selected %d\n", 
380                            blk_state->virtio_cfg.vring_queue_selector);
381                 return -1;
382             }
383
384             break;
385         case VRING_Q_NOTIFY_PORT:
386             PrintDebug("Handling Kick\n");
387             if (handle_kick(blk_state) == -1) {
388                 PrintError("Could not handle Block Notification\n");
389                 return -1;
390             }
391             break;
392         case VIRTIO_STATUS_PORT:
393             blk_state->virtio_cfg.status = *(uint8_t *)src;
394
395             if (blk_state->virtio_cfg.status == 0) {
396                 PrintDebug("Resetting device\n");
397                 blk_reset(blk_state);
398             }
399
400             break;
401
402         case VIRTIO_ISR_PORT:
403             blk_state->virtio_cfg.pci_isr = *(uint8_t *)src;
404             break;
405         default:
406             return -1;
407             break;
408     }
409
410     return length;
411 }
412
413
414 static int virtio_io_read(uint16_t port, void * dst, uint_t length, void * private_data) {
415     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
416     int port_idx = port % blk_state->io_range_size;
417
418
419     PrintDebug("VIRTIO BLOCK Read  for port %d (index =%d), length=%d\n", 
420                port, port_idx, length);
421
422     switch (port_idx) {
423         case HOST_FEATURES_PORT:
424             if (length != 4) {
425                 PrintError("Illegal read length for host features\n");
426                 return -1;
427             }
428
429             *(uint32_t *)dst = blk_state->virtio_cfg.host_features;
430         
431             break;
432         case VRING_PG_NUM_PORT:
433             if (length != 4) {
434                 PrintError("Illegal read length for page frame number\n");
435                 return -1;
436             }
437
438             *(uint32_t *)dst = blk_state->queue.pfn;
439
440             break;
441         case VRING_SIZE_PORT:
442             if (length != 2) {
443                 PrintError("Illegal read length for vring size\n");
444                 return -1;
445             }
446                 
447             *(uint16_t *)dst = blk_state->queue.queue_size;
448
449             break;
450
451         case VIRTIO_STATUS_PORT:
452             if (length != 1) {
453                 PrintError("Illegal read length for status\n");
454                 return -1;
455             }
456
457             *(uint8_t *)dst = blk_state->virtio_cfg.status;
458             break;
459
460         case VIRTIO_ISR_PORT:
461             *(uint8_t *)dst = blk_state->virtio_cfg.pci_isr;
462             blk_state->virtio_cfg.pci_isr = 0;
463             v3_pci_lower_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
464             break;
465
466         default:
467             if ( (port_idx >= sizeof(struct virtio_config)) && 
468                  (port_idx < (sizeof(struct virtio_config) + sizeof(struct blk_config))) ) {
469                 int cfg_offset = port_idx - sizeof(struct virtio_config);
470                 uint8_t * cfg_ptr = (uint8_t *)&(blk_state->block_cfg);
471
472                 memcpy(dst, cfg_ptr + cfg_offset, length);
473                 
474             } else {
475                 PrintError("Read of Unhandled Virtio Read\n");
476                 return -1;
477             }
478           
479             break;
480     }
481
482     return length;
483 }
484
485
486
487
488 static struct v3_device_ops dev_ops = {
489     .free = virtio_free,
490     .reset = virtio_reset,
491     .start = NULL,
492     .stop = NULL,
493 };
494
495
496
497
498
499 static int register_dev(struct virtio_dev_state * virtio, struct virtio_blk_state * blk_state) {
500     // initialize PCI
501     struct pci_device * pci_dev = NULL;
502     struct v3_pci_bar bars[6];
503     int num_ports = sizeof(struct virtio_config) + sizeof(struct blk_config);
504     int tmp_ports = num_ports;
505     int i;
506
507
508
509     // This gets the number of ports, rounded up to a power of 2
510     blk_state->io_range_size = 1; // must be a power of 2
511     
512     while (tmp_ports > 0) {
513         tmp_ports >>= 1;
514         blk_state->io_range_size <<= 1;
515     }
516         
517     // this is to account for any low order bits being set in num_ports
518     // if there are none, then num_ports was already a power of 2 so we shift right to reset it
519     if ((num_ports & ((blk_state->io_range_size >> 1) - 1)) == 0) {
520         blk_state->io_range_size >>= 1;
521     }
522     
523     
524     for (i = 0; i < 6; i++) {
525         bars[i].type = PCI_BAR_NONE;
526     }
527     
528     PrintDebug("Virtio-BLK io_range_size = %d\n", blk_state->io_range_size);
529     
530     bars[0].type = PCI_BAR_IO;
531     bars[0].default_base_port = -1;
532     bars[0].num_ports = blk_state->io_range_size;
533     
534     bars[0].io_read = virtio_io_read;
535     bars[0].io_write = virtio_io_write;
536     bars[0].private_data = blk_state;
537     
538     pci_dev = v3_pci_register_device(virtio->pci_bus, PCI_STD_DEVICE, 
539                                      0, PCI_AUTO_DEV_NUM, 0,
540                                      "LNX_VIRTIO_BLK", bars,
541                                      NULL, NULL, NULL, blk_state);
542     
543     if (!pci_dev) {
544         PrintError("Could not register PCI Device\n");
545         return -1;
546     }
547     
548     pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID;
549     pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID;
550     
551     
552     pci_dev->config_header.device_id = VIRTIO_BLOCK_DEV_ID;
553     pci_dev->config_header.class = PCI_CLASS_STORAGE;
554     pci_dev->config_header.subclass = PCI_STORAGE_SUBCLASS_OTHER;
555     
556     pci_dev->config_header.subsystem_id = VIRTIO_BLOCK_SUBDEVICE_ID;
557     
558     
559     pci_dev->config_header.intr_pin = 1;
560     
561     pci_dev->config_header.max_latency = 1; // ?? (qemu does it...)
562     
563     
564     blk_state->pci_dev = pci_dev;
565     
566     /* Block configuration */
567     blk_state->virtio_cfg.host_features = VIRTIO_SEG_MAX;
568     blk_state->block_cfg.max_seg = QUEUE_SIZE - 2;
569
570     // Virtio Block only uses one queue
571     blk_state->queue.queue_size = QUEUE_SIZE;
572
573     blk_state->virtio_dev = virtio;
574
575     blk_reset(blk_state);
576
577
578     return 0;
579 }
580
581
582 static int connect_fn(struct guest_info * info, 
583                       void * frontend_data, 
584                       struct v3_dev_blk_ops * ops, 
585                       v3_cfg_tree_t * cfg, 
586                       void * private_data) {
587
588     struct virtio_dev_state * virtio = (struct virtio_dev_state *)frontend_data;
589
590     struct virtio_blk_state * blk_state  = (struct virtio_blk_state *)V3_Malloc(sizeof(struct virtio_blk_state));
591     memset(blk_state, 0, sizeof(struct virtio_blk_state));
592
593     register_dev(virtio, blk_state);
594
595     blk_state->ops = ops;
596     blk_state->backend_data = private_data;
597
598     blk_state->block_cfg.capacity = ops->get_capacity(private_data) / SECTOR_SIZE;
599
600     PrintDebug("Virtio Capacity = %d -- 0x%p\n", (int)(blk_state->block_cfg.capacity), 
601                (void *)(addr_t)(blk_state->block_cfg.capacity));
602
603     return 0;
604 }
605
606
607 static int virtio_init(struct guest_info * vm, v3_cfg_tree_t * cfg) {
608     struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
609     struct virtio_dev_state * virtio_state = NULL;
610     char * name = v3_cfg_val(cfg, "name");
611
612     PrintDebug("Initializing VIRTIO Block device\n");
613
614     if (pci_bus == NULL) {
615         PrintError("VirtIO devices require a PCI Bus");
616         return -1;
617     }
618
619
620     virtio_state  = (struct virtio_dev_state *)V3_Malloc(sizeof(struct virtio_dev_state));
621     memset(virtio_state, 0, sizeof(struct virtio_dev_state));
622
623     INIT_LIST_HEAD(&(virtio_state->dev_list));
624     virtio_state->pci_bus = pci_bus;
625     virtio_state->vm = vm;
626
627     struct vm_device * dev = v3_allocate_device(name, &dev_ops, virtio_state);
628     if (v3_attach_device(vm, dev) == -1) {
629         PrintError("Could not attach device %s\n", name);
630         return -1;
631     }
632
633     if (v3_dev_add_blk_frontend(vm, name, connect_fn, (void *)virtio_state) == -1) {
634         PrintError("Could not register %s as block frontend\n", name);
635         return -1;
636     }
637
638     return 0;
639 }
640
641
642 device_register("LNX_VIRTIO_BLK", virtio_init)