Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


d47243a3d3b0fb384f22d382c6a43d3d412241a3
[palacios.git] / palacios / src / devices / lnx_virtio_blk.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_dev_mgr.h>
22 #include <devices/lnx_virtio_pci.h>
23 #include <palacios/vm_guest_mem.h>
24
25 #include <devices/pci.h>
26
27
28
29 #ifndef V3_CONFIG_DEBUG_VIRTIO_BLK
30 #undef PrintDebug
31 #define PrintDebug(fmt, args...)
32 #endif
33
34
35 #define SECTOR_SIZE 512
36
37 #define BLK_CAPACITY_PORT     20
38 #define BLK_MAX_SIZE_PORT     28
39 #define BLK_MAX_SEG_PORT      32
40 #define BLK_CYLINDERS_PORT    36
41 #define BLK_HEADS_PORT        38
42 #define BLK_SECTS_PORT        39
43
44 #define BLK_IN_REQ            0
45 #define BLK_OUT_REQ           1
46 #define BLK_SCSI_CMD          2
47
48 #define BLK_BARRIER_FLAG     0x80000000
49
50 #define BLK_STATUS_OK             0
51 #define BLK_STATUS_ERR            1
52 #define BLK_STATUS_NOT_SUPPORTED  2
53
54
55 struct blk_config {
56     uint64_t capacity;
57     uint32_t max_size;
58     uint32_t max_seg;
59     uint16_t cylinders;
60     uint8_t heads;
61     uint8_t sectors;
62 } __attribute__((packed));
63
64
65
66 struct blk_op_hdr {
67     uint32_t type;
68     uint32_t prior;
69     uint64_t sector;
70 } __attribute__((packed));
71
72 #define QUEUE_SIZE 128
73
74 /* Host Feature flags */
75 #define VIRTIO_BARRIER       0x01       /* Does host support barriers? */
76 #define VIRTIO_SIZE_MAX      0x02       /* Indicates maximum segment size */
77 #define VIRTIO_SEG_MAX       0x04       /* Indicates maximum # of segments */
78 #define VIRTIO_LEGACY_GEOM   0x10       /* Indicates support of legacy geometry */
79
80
81 struct virtio_dev_state {
82     struct vm_device * pci_bus;
83     struct list_head dev_list;
84 };
85
86 struct virtio_blk_state {
87
88     struct pci_device * pci_dev;
89     struct blk_config block_cfg;
90     struct virtio_config virtio_cfg;
91
92     
93     struct virtio_queue queue;
94
95     struct v3_dev_blk_ops * ops;
96
97     void * backend_data;
98
99     int io_range_size;
100
101     struct virtio_dev_state * virtio_dev;
102
103     struct list_head dev_link;
104 };
105
106
107
108
109 static int blk_reset(struct virtio_blk_state * virtio) {
110
111     virtio->queue.ring_desc_addr = 0;
112     virtio->queue.ring_avail_addr = 0;
113     virtio->queue.ring_used_addr = 0;
114     virtio->queue.pfn = 0;
115     virtio->queue.cur_avail_idx = 0;
116
117     virtio->virtio_cfg.status = 0;
118     virtio->virtio_cfg.pci_isr = 0;
119     return 0;
120 }
121
122
123
124
125 static int handle_read_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
126     int ret = -1;
127
128     PrintDebug("Reading Disk\n");
129     ret = blk_state->ops->read(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
130     *sector += (len / SECTOR_SIZE);
131
132     return ret;
133 }
134
135
136 static int handle_write_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
137     int ret = -1;
138
139     PrintDebug("Writing Disk\n");
140     ret = blk_state->ops->write(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
141     *sector += (len / SECTOR_SIZE);
142
143     return ret;
144 }
145
146
147
148 // multiple block operations need to increment the sector 
149
150 static int handle_block_op(struct guest_info * core, struct virtio_blk_state * blk_state, struct blk_op_hdr * hdr, 
151                            struct vring_desc * buf_desc, uint8_t * status) {
152     uint8_t * buf = NULL;
153
154     PrintDebug("Handling Block op\n");
155     if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
156         PrintError("Could not translate buffer address\n");
157         return -1;
158     }
159
160     PrintDebug("Sector=%p Length=%d\n", (void *)(addr_t)(hdr->sector), buf_desc->length);
161
162     if (hdr->type == BLK_IN_REQ) {
163         if (handle_read_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
164             *status = BLK_STATUS_ERR;
165             return -1;
166         } else {
167             *status = BLK_STATUS_OK;
168         }
169     } else if (hdr->type == BLK_OUT_REQ) {
170         if (handle_write_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
171             *status = BLK_STATUS_ERR;
172             return -1;
173         } else {
174             *status = BLK_STATUS_OK;
175         }
176     } else if (hdr->type == BLK_SCSI_CMD) {
177         PrintError("VIRTIO: SCSI Command Not supported!!!\n");
178         *status = BLK_STATUS_NOT_SUPPORTED;
179         return -1;
180     }
181
182     PrintDebug("Returning Status: %d\n", *status);
183
184     return 0;
185 }
186
187 static int get_desc_count(struct virtio_queue * q, int index) {
188     struct vring_desc * tmp_desc = &(q->desc[index]);
189     int cnt = 1;
190     
191     while (tmp_desc->flags & VIRTIO_NEXT_FLAG) {
192         tmp_desc = &(q->desc[tmp_desc->next]);
193         cnt++;
194     }
195
196     return cnt;
197 }
198
199
200
201 static int handle_kick(struct guest_info * core, struct virtio_blk_state * blk_state) {  
202     struct virtio_queue * q = &(blk_state->queue);
203
204     PrintDebug("VIRTIO KICK: cur_index=%d (mod=%d), avail_index=%d\n", 
205                q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index);
206
207     while (q->cur_avail_idx != q->avail->index) {
208         struct vring_desc * hdr_desc = NULL;
209         struct vring_desc * buf_desc = NULL;
210         struct vring_desc * status_desc = NULL;
211         struct blk_op_hdr hdr;
212         addr_t hdr_addr = 0;
213         uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
214         int desc_cnt = get_desc_count(q, desc_idx);
215         int i = 0;
216         uint8_t * status_ptr = NULL;
217         uint8_t status = BLK_STATUS_OK;
218         uint32_t req_len = 0;
219
220         PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE);
221
222         if (desc_cnt < 3) {
223             PrintError("Block operations must include at least 3 descriptors\n");
224             return -1;
225         }
226
227         hdr_desc = &(q->desc[desc_idx]);
228
229
230         PrintDebug("Header Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", hdr_desc, 
231                    (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next);    
232
233         if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
234             PrintError("Could not translate block header address\n");
235             return -1;
236         }
237
238         // We copy the block op header out because we are going to modify its contents
239         memcpy(&hdr, (void *)hdr_addr, sizeof(struct blk_op_hdr));
240         
241         PrintDebug("Blk Op Hdr (ptr=%p) type=%d, sector=%p\n", (void *)hdr_addr, hdr.type, (void *)hdr.sector);
242
243         desc_idx = hdr_desc->next;
244
245         for (i = 0; i < desc_cnt - 2; i++) {
246             uint8_t tmp_status = BLK_STATUS_OK;
247
248             buf_desc = &(q->desc[desc_idx]);
249
250             PrintDebug("Buffer Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", buf_desc, 
251                        (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next);
252
253             if (handle_block_op(core, blk_state, &hdr, buf_desc, &tmp_status) == -1) {
254                 PrintError("Error handling block operation\n");
255                 return -1;
256             }
257
258             if (tmp_status != BLK_STATUS_OK) {
259                 status = tmp_status;
260             }
261
262             req_len += buf_desc->length;
263             desc_idx = buf_desc->next;
264         }
265
266         status_desc = &(q->desc[desc_idx]);
267
268         PrintDebug("Status Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", status_desc, 
269                    (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next);
270
271         if (v3_gpa_to_hva(core, status_desc->addr_gpa, (addr_t *)&(status_ptr)) == -1) {
272             PrintError("Could not translate status address\n");
273             return -1;
274         }
275
276         req_len += status_desc->length;
277         *status_ptr = status;
278
279         q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
280         q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to????
281
282         q->used->index++;
283         q->cur_avail_idx++;
284     }
285
286     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
287         PrintDebug("Raising IRQ %d\n",  blk_state->pci_dev->config_header.intr_line);
288         v3_pci_raise_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
289         blk_state->virtio_cfg.pci_isr = 1;
290     }
291
292     return 0;
293 }
294
295 static int virtio_io_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * private_data) {
296     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
297     int port_idx = port % blk_state->io_range_size;
298
299
300     PrintDebug("VIRTIO BLOCK Write for port %d (index=%d) len=%d, value=%x\n", 
301                port, port_idx,  length, *(uint32_t *)src);
302
303
304
305     switch (port_idx) {
306         case GUEST_FEATURES_PORT:
307             if (length != 4) {
308                 PrintError("Illegal write length for guest features\n");
309                 return -1;
310             }
311             
312             blk_state->virtio_cfg.guest_features = *(uint32_t *)src;
313             PrintDebug("Setting Guest Features to %x\n", blk_state->virtio_cfg.guest_features);
314
315             break;
316         case VRING_PG_NUM_PORT:
317             if (length == 4) {
318                 addr_t pfn = *(uint32_t *)src;
319                 addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT);
320
321
322                 blk_state->queue.pfn = pfn;
323                 
324                 blk_state->queue.ring_desc_addr = page_addr ;
325                 blk_state->queue.ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc));
326                 blk_state->queue.ring_used_addr = ( blk_state->queue.ring_avail_addr + \
327                                                  sizeof(struct vring_avail)    + \
328                                                  (QUEUE_SIZE * sizeof(uint16_t)));
329                 
330                 // round up to next page boundary.
331                 blk_state->queue.ring_used_addr = (blk_state->queue.ring_used_addr + 0xfff) & ~0xfff;
332
333                 if (v3_gpa_to_hva(core, blk_state->queue.ring_desc_addr, (addr_t *)&(blk_state->queue.desc)) == -1) {
334                     PrintError("Could not translate ring descriptor address\n");
335                     return -1;
336                 }
337
338
339                 if (v3_gpa_to_hva(core, blk_state->queue.ring_avail_addr, (addr_t *)&(blk_state->queue.avail)) == -1) {
340                     PrintError("Could not translate ring available address\n");
341                     return -1;
342                 }
343
344
345                 if (v3_gpa_to_hva(core, blk_state->queue.ring_used_addr, (addr_t *)&(blk_state->queue.used)) == -1) {
346                     PrintError("Could not translate ring used address\n");
347                     return -1;
348                 }
349
350                 PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n",
351                            (void *)(blk_state->queue.ring_desc_addr),
352                            (void *)(blk_state->queue.ring_avail_addr),
353                            (void *)(blk_state->queue.ring_used_addr));
354
355                 PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", 
356                            blk_state->queue.desc, blk_state->queue.avail, blk_state->queue.used);
357
358             } else {
359                 PrintError("Illegal write length for page frame number\n");
360                 return -1;
361             }
362             break;
363         case VRING_Q_SEL_PORT:
364             blk_state->virtio_cfg.vring_queue_selector = *(uint16_t *)src;
365
366             if (blk_state->virtio_cfg.vring_queue_selector != 0) {
367                 PrintError("Virtio Block device only uses 1 queue, selected %d\n", 
368                            blk_state->virtio_cfg.vring_queue_selector);
369                 return -1;
370             }
371
372             break;
373         case VRING_Q_NOTIFY_PORT:
374             PrintDebug("Handling Kick\n");
375             if (handle_kick(core, blk_state) == -1) {
376                 PrintError("Could not handle Block Notification\n");
377                 return -1;
378             }
379             break;
380         case VIRTIO_STATUS_PORT:
381             blk_state->virtio_cfg.status = *(uint8_t *)src;
382
383             if (blk_state->virtio_cfg.status == 0) {
384                 PrintDebug("Resetting device\n");
385                 blk_reset(blk_state);
386             }
387
388             break;
389
390         case VIRTIO_ISR_PORT:
391             blk_state->virtio_cfg.pci_isr = *(uint8_t *)src;
392             break;
393         default:
394             return -1;
395             break;
396     }
397
398     return length;
399 }
400
401
402 static int virtio_io_read(struct guest_info * core, uint16_t port, void * dst, uint_t length, void * private_data) {
403     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
404     int port_idx = port % blk_state->io_range_size;
405
406
407     PrintDebug("VIRTIO BLOCK Read  for port %d (index =%d), length=%d\n", 
408                port, port_idx, length);
409
410     switch (port_idx) {
411         case HOST_FEATURES_PORT:
412             if (length != 4) {
413                 PrintError("Illegal read length for host features\n");
414                 return -1;
415             }
416
417             *(uint32_t *)dst = blk_state->virtio_cfg.host_features;
418         
419             break;
420         case VRING_PG_NUM_PORT:
421             if (length != 4) {
422                 PrintError("Illegal read length for page frame number\n");
423                 return -1;
424             }
425
426             *(uint32_t *)dst = blk_state->queue.pfn;
427
428             break;
429         case VRING_SIZE_PORT:
430             if (length != 2) {
431                 PrintError("Illegal read length for vring size\n");
432                 return -1;
433             }
434                 
435             *(uint16_t *)dst = blk_state->queue.queue_size;
436
437             break;
438
439         case VIRTIO_STATUS_PORT:
440             if (length != 1) {
441                 PrintError("Illegal read length for status\n");
442                 return -1;
443             }
444
445             *(uint8_t *)dst = blk_state->virtio_cfg.status;
446             break;
447
448         case VIRTIO_ISR_PORT:
449             *(uint8_t *)dst = blk_state->virtio_cfg.pci_isr;
450             blk_state->virtio_cfg.pci_isr = 0;
451             v3_pci_lower_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
452             break;
453
454         default:
455             if ( (port_idx >= sizeof(struct virtio_config)) && 
456                  (port_idx < (sizeof(struct virtio_config) + sizeof(struct blk_config))) ) {
457                 int cfg_offset = port_idx - sizeof(struct virtio_config);
458                 uint8_t * cfg_ptr = (uint8_t *)&(blk_state->block_cfg);
459
460                 memcpy(dst, cfg_ptr + cfg_offset, length);
461                 
462             } else {
463                 PrintError("Read of Unhandled Virtio Read\n");
464                 return -1;
465             }
466           
467             break;
468     }
469
470     return length;
471 }
472
473
474 static int virtio_free(struct virtio_dev_state * virtio) {
475     struct virtio_blk_state * blk_state = NULL;
476     struct virtio_blk_state * tmp = NULL;
477
478     list_for_each_entry_safe(blk_state, tmp, &(virtio->dev_list), dev_link) {
479
480         // unregister from PCI
481
482         list_del(&(blk_state->dev_link));
483         V3_Free(blk_state);
484     }
485     
486
487     V3_Free(virtio);
488
489     return 0;
490 }
491
492
493
494 static struct v3_device_ops dev_ops = {
495     .free = (int (*)(void *))virtio_free,
496
497 };
498
499
500
501
502
503 static int register_dev(struct virtio_dev_state * virtio, struct virtio_blk_state * blk_state) {
504     // initialize PCI
505     struct pci_device * pci_dev = NULL;
506     struct v3_pci_bar bars[6];
507     int num_ports = sizeof(struct virtio_config) + sizeof(struct blk_config);
508     int tmp_ports = num_ports;
509     int i;
510
511
512
513     // This gets the number of ports, rounded up to a power of 2
514     blk_state->io_range_size = 1; // must be a power of 2
515     
516     while (tmp_ports > 0) {
517         tmp_ports >>= 1;
518         blk_state->io_range_size <<= 1;
519     }
520         
521     // this is to account for any low order bits being set in num_ports
522     // if there are none, then num_ports was already a power of 2 so we shift right to reset it
523     if ((num_ports & ((blk_state->io_range_size >> 1) - 1)) == 0) {
524         blk_state->io_range_size >>= 1;
525     }
526     
527     
528     for (i = 0; i < 6; i++) {
529         bars[i].type = PCI_BAR_NONE;
530     }
531     
532     PrintDebug("Virtio-BLK io_range_size = %d\n", blk_state->io_range_size);
533     
534     bars[0].type = PCI_BAR_IO;
535     bars[0].default_base_port = -1;
536     bars[0].num_ports = blk_state->io_range_size;
537     
538     bars[0].io_read = virtio_io_read;
539     bars[0].io_write = virtio_io_write;
540     bars[0].private_data = blk_state;
541     
542     pci_dev = v3_pci_register_device(virtio->pci_bus, PCI_STD_DEVICE, 
543                                      0, PCI_AUTO_DEV_NUM, 0,
544                                      "LNX_VIRTIO_BLK", bars,
545                                      NULL, NULL, NULL, blk_state);
546     
547     if (!pci_dev) {
548         PrintError("Could not register PCI Device\n");
549         return -1;
550     }
551     
552     pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID;
553     pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID;
554     
555     
556     pci_dev->config_header.device_id = VIRTIO_BLOCK_DEV_ID;
557     pci_dev->config_header.class = PCI_CLASS_STORAGE;
558     pci_dev->config_header.subclass = PCI_STORAGE_SUBCLASS_OTHER;
559     
560     pci_dev->config_header.subsystem_id = VIRTIO_BLOCK_SUBDEVICE_ID;
561     
562     
563     pci_dev->config_header.intr_pin = 1;
564     
565     pci_dev->config_header.max_latency = 1; // ?? (qemu does it...)
566     
567     
568     blk_state->pci_dev = pci_dev;
569
570
571     /* Add backend to list of devices */
572     list_add(&(blk_state->dev_link), &(virtio->dev_list));
573     
574     /* Block configuration */
575     blk_state->virtio_cfg.host_features = VIRTIO_SEG_MAX;
576     blk_state->block_cfg.max_seg = QUEUE_SIZE - 2;
577
578     // Virtio Block only uses one queue
579     blk_state->queue.queue_size = QUEUE_SIZE;
580
581     blk_state->virtio_dev = virtio;
582
583     blk_reset(blk_state);
584
585
586     return 0;
587 }
588
589
590 static int connect_fn(struct v3_vm_info * vm, 
591                       void * frontend_data, 
592                       struct v3_dev_blk_ops * ops, 
593                       v3_cfg_tree_t * cfg, 
594                       void * private_data) {
595
596     struct virtio_dev_state * virtio = (struct virtio_dev_state *)frontend_data;
597
598     struct virtio_blk_state * blk_state  = (struct virtio_blk_state *)V3_Malloc(sizeof(struct virtio_blk_state));
599     memset(blk_state, 0, sizeof(struct virtio_blk_state));
600
601     register_dev(virtio, blk_state);
602
603     blk_state->ops = ops;
604     blk_state->backend_data = private_data;
605
606     blk_state->block_cfg.capacity = ops->get_capacity(private_data) / SECTOR_SIZE;
607
608     PrintDebug("Virtio Capacity = %d -- 0x%p\n", (int)(blk_state->block_cfg.capacity), 
609                (void *)(addr_t)(blk_state->block_cfg.capacity));
610
611     return 0;
612 }
613
614
615 static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
616     struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
617     struct virtio_dev_state * virtio_state = NULL;
618     char * dev_id = v3_cfg_val(cfg, "ID");
619
620     PrintDebug("Initializing VIRTIO Block device\n");
621
622     if (pci_bus == NULL) {
623         PrintError("VirtIO devices require a PCI Bus");
624         return -1;
625     }
626
627
628     virtio_state  = (struct virtio_dev_state *)V3_Malloc(sizeof(struct virtio_dev_state));
629     memset(virtio_state, 0, sizeof(struct virtio_dev_state));
630
631     INIT_LIST_HEAD(&(virtio_state->dev_list));
632     virtio_state->pci_bus = pci_bus;
633
634
635     struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, virtio_state);
636
637     if (dev == NULL) {
638         PrintError("Could not attach device %s\n", dev_id);
639         V3_Free(virtio_state);
640         return -1;
641     }
642
643     if (v3_dev_add_blk_frontend(vm, dev_id, connect_fn, (void *)virtio_state) == -1) {
644         PrintError("Could not register %s as block frontend\n", dev_id);
645         v3_remove_device(dev);
646         return -1;
647     }
648
649     return 0;
650 }
651
652
653 device_register("LNX_VIRTIO_BLK", virtio_init)