Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


6cb8fa33f9a539479cedd2b33d992fd5a12b5502
[palacios.git] / palacios / src / devices / lnx_virtio_blk.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_dev_mgr.h>
22 #include <devices/lnx_virtio_pci.h>
23 #include <palacios/vm_guest_mem.h>
24
25 #include <devices/pci.h>
26
27
28
29 #ifndef CONFIG_DEBUG_VIRTIO_BLK
30 #undef PrintDebug
31 #define PrintDebug(fmt, args...)
32 #endif
33
34
35 #define SECTOR_SIZE 512
36
37 #define BLK_CAPACITY_PORT     20
38 #define BLK_MAX_SIZE_PORT     28
39 #define BLK_MAX_SEG_PORT      32
40 #define BLK_CYLINDERS_PORT    36
41 #define BLK_HEADS_PORT        38
42 #define BLK_SECTS_PORT        39
43
44 #define BLK_IN_REQ            0
45 #define BLK_OUT_REQ           1
46 #define BLK_SCSI_CMD          2
47
48 #define BLK_BARRIER_FLAG     0x80000000
49
50 #define BLK_STATUS_OK             0
51 #define BLK_STATUS_ERR            1
52 #define BLK_STATUS_NOT_SUPPORTED  2
53
54
55 struct blk_config {
56     uint64_t capacity;
57     uint32_t max_size;
58     uint32_t max_seg;
59     uint16_t cylinders;
60     uint8_t heads;
61     uint8_t sectors;
62 } __attribute__((packed));
63
64
65
66 struct blk_op_hdr {
67     uint32_t type;
68     uint32_t prior;
69     uint64_t sector;
70 } __attribute__((packed));
71
72 #define QUEUE_SIZE 128
73
74 /* Host Feature flags */
75 #define VIRTIO_BARRIER       0x01       /* Does host support barriers? */
76 #define VIRTIO_SIZE_MAX      0x02       /* Indicates maximum segment size */
77 #define VIRTIO_SEG_MAX       0x04       /* Indicates maximum # of segments */
78 #define VIRTIO_LEGACY_GEOM   0x10       /* Indicates support of legacy geometry */
79
80
81 struct virtio_dev_state {
82     struct vm_device * pci_bus;
83     struct list_head dev_list;
84 };
85
86 struct virtio_blk_state {
87
88     struct pci_device * pci_dev;
89     struct blk_config block_cfg;
90     struct virtio_config virtio_cfg;
91
92     
93     struct virtio_queue queue;
94
95     struct v3_dev_blk_ops * ops;
96
97     void * backend_data;
98
99     int io_range_size;
100
101     struct virtio_dev_state * virtio_dev;
102
103     struct list_head dev_link;
104 };
105
106
107 static int virtio_free(struct vm_device * dev) {
108     return -1;
109 }
110
111 static int blk_reset(struct virtio_blk_state * virtio) {
112
113     virtio->queue.ring_desc_addr = 0;
114     virtio->queue.ring_avail_addr = 0;
115     virtio->queue.ring_used_addr = 0;
116     virtio->queue.pfn = 0;
117     virtio->queue.cur_avail_idx = 0;
118
119     virtio->virtio_cfg.status = 0;
120     virtio->virtio_cfg.pci_isr = 0;
121     return 0;
122 }
123
124
125
126
127 static int handle_read_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
128     int ret = -1;
129
130     PrintDebug("Reading Disk\n");
131     ret = blk_state->ops->read(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
132     *sector += (len / SECTOR_SIZE);
133
134     return ret;
135 }
136
137
138 static int handle_write_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
139     int ret = -1;
140
141     PrintDebug("Writing Disk\n");
142     ret = blk_state->ops->write(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
143     *sector += (len / SECTOR_SIZE);
144
145     return ret;
146 }
147
148
149
150 // multiple block operations need to increment the sector 
151
152 static int handle_block_op(struct guest_info * core, struct virtio_blk_state * blk_state, struct blk_op_hdr * hdr, 
153                            struct vring_desc * buf_desc, uint8_t * status) {
154     uint8_t * buf = NULL;
155
156     PrintDebug("Handling Block op\n");
157     if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
158         PrintError("Could not translate buffer address\n");
159         return -1;
160     }
161
162     PrintDebug("Sector=%p Length=%d\n", (void *)(addr_t)(hdr->sector), buf_desc->length);
163
164     if (hdr->type == BLK_IN_REQ) {
165         if (handle_read_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
166             *status = BLK_STATUS_ERR;
167             return -1;
168         } else {
169             *status = BLK_STATUS_OK;
170         }
171     } else if (hdr->type == BLK_OUT_REQ) {
172         if (handle_write_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
173             *status = BLK_STATUS_ERR;
174             return -1;
175         } else {
176             *status = BLK_STATUS_OK;
177         }
178     } else if (hdr->type == BLK_SCSI_CMD) {
179         PrintError("VIRTIO: SCSI Command Not supported!!!\n");
180         *status = BLK_STATUS_NOT_SUPPORTED;
181         return -1;
182     }
183
184     PrintDebug("Returning Status: %d\n", *status);
185
186     return 0;
187 }
188
189 static int get_desc_count(struct virtio_queue * q, int index) {
190     struct vring_desc * tmp_desc = &(q->desc[index]);
191     int cnt = 1;
192     
193     while (tmp_desc->flags & VIRTIO_NEXT_FLAG) {
194         tmp_desc = &(q->desc[tmp_desc->next]);
195         cnt++;
196     }
197
198     return cnt;
199 }
200
201
202
203 static int handle_kick(struct guest_info * core, struct virtio_blk_state * blk_state) {  
204     struct virtio_queue * q = &(blk_state->queue);
205
206     PrintDebug("VIRTIO KICK: cur_index=%d (mod=%d), avail_index=%d\n", 
207                q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index);
208
209     while (q->cur_avail_idx != q->avail->index) {
210         struct vring_desc * hdr_desc = NULL;
211         struct vring_desc * buf_desc = NULL;
212         struct vring_desc * status_desc = NULL;
213         struct blk_op_hdr hdr;
214         addr_t hdr_addr = 0;
215         uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
216         int desc_cnt = get_desc_count(q, desc_idx);
217         int i = 0;
218         uint8_t * status_ptr = NULL;
219         uint8_t status = BLK_STATUS_OK;
220         uint32_t req_len = 0;
221
222         PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE);
223
224         if (desc_cnt < 3) {
225             PrintError("Block operations must include at least 3 descriptors\n");
226             return -1;
227         }
228
229         hdr_desc = &(q->desc[desc_idx]);
230
231
232         PrintDebug("Header Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", hdr_desc, 
233                    (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next);    
234
235         if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
236             PrintError("Could not translate block header address\n");
237             return -1;
238         }
239
240         // We copy the block op header out because we are going to modify its contents
241         memcpy(&hdr, (void *)hdr_addr, sizeof(struct blk_op_hdr));
242         
243         PrintDebug("Blk Op Hdr (ptr=%p) type=%d, sector=%p\n", (void *)hdr_addr, hdr.type, (void *)hdr.sector);
244
245         desc_idx = hdr_desc->next;
246
247         for (i = 0; i < desc_cnt - 2; i++) {
248             uint8_t tmp_status = BLK_STATUS_OK;
249
250             buf_desc = &(q->desc[desc_idx]);
251
252             PrintDebug("Buffer Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", buf_desc, 
253                        (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next);
254
255             if (handle_block_op(core, blk_state, &hdr, buf_desc, &tmp_status) == -1) {
256                 PrintError("Error handling block operation\n");
257                 return -1;
258             }
259
260             if (tmp_status != BLK_STATUS_OK) {
261                 status = tmp_status;
262             }
263
264             req_len += buf_desc->length;
265             desc_idx = buf_desc->next;
266         }
267
268         status_desc = &(q->desc[desc_idx]);
269
270         PrintDebug("Status Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", status_desc, 
271                    (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next);
272
273         if (v3_gpa_to_hva(core, status_desc->addr_gpa, (addr_t *)&(status_ptr)) == -1) {
274             PrintError("Could not translate status address\n");
275             return -1;
276         }
277
278         req_len += status_desc->length;
279         *status_ptr = status;
280
281         q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
282         q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to????
283
284         q->used->index++;
285         q->cur_avail_idx++;
286     }
287
288     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
289         PrintDebug("Raising IRQ %d\n",  blk_state->pci_dev->config_header.intr_line);
290         v3_pci_raise_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
291         blk_state->virtio_cfg.pci_isr = 1;
292     }
293
294     return 0;
295 }
296
297 static int virtio_io_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * private_data) {
298     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
299     int port_idx = port % blk_state->io_range_size;
300
301
302     PrintDebug("VIRTIO BLOCK Write for port %d (index=%d) len=%d, value=%x\n", 
303                port, port_idx,  length, *(uint32_t *)src);
304
305
306
307     switch (port_idx) {
308         case GUEST_FEATURES_PORT:
309             if (length != 4) {
310                 PrintError("Illegal write length for guest features\n");
311                 return -1;
312             }
313             
314             blk_state->virtio_cfg.guest_features = *(uint32_t *)src;
315             PrintDebug("Setting Guest Features to %x\n", blk_state->virtio_cfg.guest_features);
316
317             break;
318         case VRING_PG_NUM_PORT:
319             if (length == 4) {
320                 addr_t pfn = *(uint32_t *)src;
321                 addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT);
322
323
324                 blk_state->queue.pfn = pfn;
325                 
326                 blk_state->queue.ring_desc_addr = page_addr ;
327                 blk_state->queue.ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc));
328                 blk_state->queue.ring_used_addr = ( blk_state->queue.ring_avail_addr + \
329                                                  sizeof(struct vring_avail)    + \
330                                                  (QUEUE_SIZE * sizeof(uint16_t)));
331                 
332                 // round up to next page boundary.
333                 blk_state->queue.ring_used_addr = (blk_state->queue.ring_used_addr + 0xfff) & ~0xfff;
334
335                 if (v3_gpa_to_hva(core, blk_state->queue.ring_desc_addr, (addr_t *)&(blk_state->queue.desc)) == -1) {
336                     PrintError("Could not translate ring descriptor address\n");
337                     return -1;
338                 }
339
340
341                 if (v3_gpa_to_hva(core, blk_state->queue.ring_avail_addr, (addr_t *)&(blk_state->queue.avail)) == -1) {
342                     PrintError("Could not translate ring available address\n");
343                     return -1;
344                 }
345
346
347                 if (v3_gpa_to_hva(core, blk_state->queue.ring_used_addr, (addr_t *)&(blk_state->queue.used)) == -1) {
348                     PrintError("Could not translate ring used address\n");
349                     return -1;
350                 }
351
352                 PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n",
353                            (void *)(blk_state->queue.ring_desc_addr),
354                            (void *)(blk_state->queue.ring_avail_addr),
355                            (void *)(blk_state->queue.ring_used_addr));
356
357                 PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", 
358                            blk_state->queue.desc, blk_state->queue.avail, blk_state->queue.used);
359
360             } else {
361                 PrintError("Illegal write length for page frame number\n");
362                 return -1;
363             }
364             break;
365         case VRING_Q_SEL_PORT:
366             blk_state->virtio_cfg.vring_queue_selector = *(uint16_t *)src;
367
368             if (blk_state->virtio_cfg.vring_queue_selector != 0) {
369                 PrintError("Virtio Block device only uses 1 queue, selected %d\n", 
370                            blk_state->virtio_cfg.vring_queue_selector);
371                 return -1;
372             }
373
374             break;
375         case VRING_Q_NOTIFY_PORT:
376             PrintDebug("Handling Kick\n");
377             if (handle_kick(core, blk_state) == -1) {
378                 PrintError("Could not handle Block Notification\n");
379                 return -1;
380             }
381             break;
382         case VIRTIO_STATUS_PORT:
383             blk_state->virtio_cfg.status = *(uint8_t *)src;
384
385             if (blk_state->virtio_cfg.status == 0) {
386                 PrintDebug("Resetting device\n");
387                 blk_reset(blk_state);
388             }
389
390             break;
391
392         case VIRTIO_ISR_PORT:
393             blk_state->virtio_cfg.pci_isr = *(uint8_t *)src;
394             break;
395         default:
396             return -1;
397             break;
398     }
399
400     return length;
401 }
402
403
404 static int virtio_io_read(struct guest_info * core, uint16_t port, void * dst, uint_t length, void * private_data) {
405     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
406     int port_idx = port % blk_state->io_range_size;
407
408
409     PrintDebug("VIRTIO BLOCK Read  for port %d (index =%d), length=%d\n", 
410                port, port_idx, length);
411
412     switch (port_idx) {
413         case HOST_FEATURES_PORT:
414             if (length != 4) {
415                 PrintError("Illegal read length for host features\n");
416                 return -1;
417             }
418
419             *(uint32_t *)dst = blk_state->virtio_cfg.host_features;
420         
421             break;
422         case VRING_PG_NUM_PORT:
423             if (length != 4) {
424                 PrintError("Illegal read length for page frame number\n");
425                 return -1;
426             }
427
428             *(uint32_t *)dst = blk_state->queue.pfn;
429
430             break;
431         case VRING_SIZE_PORT:
432             if (length != 2) {
433                 PrintError("Illegal read length for vring size\n");
434                 return -1;
435             }
436                 
437             *(uint16_t *)dst = blk_state->queue.queue_size;
438
439             break;
440
441         case VIRTIO_STATUS_PORT:
442             if (length != 1) {
443                 PrintError("Illegal read length for status\n");
444                 return -1;
445             }
446
447             *(uint8_t *)dst = blk_state->virtio_cfg.status;
448             break;
449
450         case VIRTIO_ISR_PORT:
451             *(uint8_t *)dst = blk_state->virtio_cfg.pci_isr;
452             blk_state->virtio_cfg.pci_isr = 0;
453             v3_pci_lower_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
454             break;
455
456         default:
457             if ( (port_idx >= sizeof(struct virtio_config)) && 
458                  (port_idx < (sizeof(struct virtio_config) + sizeof(struct blk_config))) ) {
459                 int cfg_offset = port_idx - sizeof(struct virtio_config);
460                 uint8_t * cfg_ptr = (uint8_t *)&(blk_state->block_cfg);
461
462                 memcpy(dst, cfg_ptr + cfg_offset, length);
463                 
464             } else {
465                 PrintError("Read of Unhandled Virtio Read\n");
466                 return -1;
467             }
468           
469             break;
470     }
471
472     return length;
473 }
474
475
476
477
478 static struct v3_device_ops dev_ops = {
479     .free = virtio_free,
480
481 };
482
483
484
485
486
487 static int register_dev(struct virtio_dev_state * virtio, struct virtio_blk_state * blk_state) {
488     // initialize PCI
489     struct pci_device * pci_dev = NULL;
490     struct v3_pci_bar bars[6];
491     int num_ports = sizeof(struct virtio_config) + sizeof(struct blk_config);
492     int tmp_ports = num_ports;
493     int i;
494
495
496
497     // This gets the number of ports, rounded up to a power of 2
498     blk_state->io_range_size = 1; // must be a power of 2
499     
500     while (tmp_ports > 0) {
501         tmp_ports >>= 1;
502         blk_state->io_range_size <<= 1;
503     }
504         
505     // this is to account for any low order bits being set in num_ports
506     // if there are none, then num_ports was already a power of 2 so we shift right to reset it
507     if ((num_ports & ((blk_state->io_range_size >> 1) - 1)) == 0) {
508         blk_state->io_range_size >>= 1;
509     }
510     
511     
512     for (i = 0; i < 6; i++) {
513         bars[i].type = PCI_BAR_NONE;
514     }
515     
516     PrintDebug("Virtio-BLK io_range_size = %d\n", blk_state->io_range_size);
517     
518     bars[0].type = PCI_BAR_IO;
519     bars[0].default_base_port = -1;
520     bars[0].num_ports = blk_state->io_range_size;
521     
522     bars[0].io_read = virtio_io_read;
523     bars[0].io_write = virtio_io_write;
524     bars[0].private_data = blk_state;
525     
526     pci_dev = v3_pci_register_device(virtio->pci_bus, PCI_STD_DEVICE, 
527                                      0, PCI_AUTO_DEV_NUM, 0,
528                                      "LNX_VIRTIO_BLK", bars,
529                                      NULL, NULL, NULL, blk_state);
530     
531     if (!pci_dev) {
532         PrintError("Could not register PCI Device\n");
533         return -1;
534     }
535     
536     pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID;
537     pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID;
538     
539     
540     pci_dev->config_header.device_id = VIRTIO_BLOCK_DEV_ID;
541     pci_dev->config_header.class = PCI_CLASS_STORAGE;
542     pci_dev->config_header.subclass = PCI_STORAGE_SUBCLASS_OTHER;
543     
544     pci_dev->config_header.subsystem_id = VIRTIO_BLOCK_SUBDEVICE_ID;
545     
546     
547     pci_dev->config_header.intr_pin = 1;
548     
549     pci_dev->config_header.max_latency = 1; // ?? (qemu does it...)
550     
551     
552     blk_state->pci_dev = pci_dev;
553     
554     /* Block configuration */
555     blk_state->virtio_cfg.host_features = VIRTIO_SEG_MAX;
556     blk_state->block_cfg.max_seg = QUEUE_SIZE - 2;
557
558     // Virtio Block only uses one queue
559     blk_state->queue.queue_size = QUEUE_SIZE;
560
561     blk_state->virtio_dev = virtio;
562
563     blk_reset(blk_state);
564
565
566     return 0;
567 }
568
569
570 static int connect_fn(struct v3_vm_info * vm, 
571                       void * frontend_data, 
572                       struct v3_dev_blk_ops * ops, 
573                       v3_cfg_tree_t * cfg, 
574                       void * private_data) {
575
576     struct virtio_dev_state * virtio = (struct virtio_dev_state *)frontend_data;
577
578     struct virtio_blk_state * blk_state  = (struct virtio_blk_state *)V3_Malloc(sizeof(struct virtio_blk_state));
579     memset(blk_state, 0, sizeof(struct virtio_blk_state));
580
581     register_dev(virtio, blk_state);
582
583     blk_state->ops = ops;
584     blk_state->backend_data = private_data;
585
586     blk_state->block_cfg.capacity = ops->get_capacity(private_data) / SECTOR_SIZE;
587
588     PrintDebug("Virtio Capacity = %d -- 0x%p\n", (int)(blk_state->block_cfg.capacity), 
589                (void *)(addr_t)(blk_state->block_cfg.capacity));
590
591     return 0;
592 }
593
594
595 static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
596     struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
597     struct virtio_dev_state * virtio_state = NULL;
598     char * dev_id = v3_cfg_val(cfg, "ID");
599
600     PrintDebug("Initializing VIRTIO Block device\n");
601
602     if (pci_bus == NULL) {
603         PrintError("VirtIO devices require a PCI Bus");
604         return -1;
605     }
606
607
608     virtio_state  = (struct virtio_dev_state *)V3_Malloc(sizeof(struct virtio_dev_state));
609     memset(virtio_state, 0, sizeof(struct virtio_dev_state));
610
611     INIT_LIST_HEAD(&(virtio_state->dev_list));
612     virtio_state->pci_bus = pci_bus;
613
614
615     struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, virtio_state);
616     if (v3_attach_device(vm, dev) == -1) {
617         PrintError("Could not attach device %s\n", dev_id);
618         return -1;
619     }
620
621     if (v3_dev_add_blk_frontend(vm, dev_id, connect_fn, (void *)virtio_state) == -1) {
622         PrintError("Could not register %s as block frontend\n", dev_id);
623         return -1;
624     }
625
626     return 0;
627 }
628
629
630 device_register("LNX_VIRTIO_BLK", virtio_init)