Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


integrated new configuration system
[palacios.git] / palacios / src / devices / lnx_virtio_blk.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_dev_mgr.h>
22 #include <devices/lnx_virtio_pci.h>
23 #include <palacios/vm_guest_mem.h>
24
25 #include <devices/pci.h>
26
27
28
29 #ifndef CONFIG_DEBUG_VIRTIO_BLK
30 #undef PrintDebug
31 #define PrintDebug(fmt, args...)
32 #endif
33
34
35 #define BLK_CAPACITY_PORT     20
36 #define BLK_MAX_SIZE_PORT     28
37 #define BLK_MAX_SEG_PORT      32
38 #define BLK_CYLINDERS_PORT    36
39 #define BLK_HEADS_PORT        38
40 #define BLK_SECTS_PORT        39
41
42 #define BLK_IN_REQ            0
43 #define BLK_OUT_REQ           1
44 #define BLK_SCSI_CMD          2
45
46 #define BLK_BARRIER_FLAG     0x80000000
47
48 #define BLK_STATUS_OK             0
49 #define BLK_STATUS_ERR            1
50 #define BLK_STATUS_NOT_SUPPORTED  2
51
52
53 struct blk_config {
54     uint64_t capacity;
55     uint32_t max_size;
56     uint32_t max_seg;
57     uint16_t cylinders;
58     uint8_t heads;
59     uint8_t sectors;
60 } __attribute__((packed));
61
62
63
64 struct blk_op_hdr {
65     uint32_t type;
66     uint32_t prior;
67     uint64_t sector;
68 } __attribute__((packed));
69
70 #define QUEUE_SIZE 128
71
72 /* Host Feature flags */
73 #define VIRTIO_BARRIER       0x01       /* Does host support barriers? */
74 #define VIRTIO_SIZE_MAX      0x02       /* Indicates maximum segment size */
75 #define VIRTIO_SEG_MAX       0x04       /* Indicates maximum # of segments */
76 #define VIRTIO_LEGACY_GEOM   0x10       /* Indicates support of legacy geometry */
77
78
79 struct virtio_dev_state {
80     struct vm_device * pci_bus;
81     struct list_head dev_list;
82     struct guest_info * vm;
83 };
84
85 struct virtio_blk_state {
86
87     struct pci_device * pci_dev;
88     struct blk_config block_cfg;
89     struct virtio_config virtio_cfg;
90
91     
92     struct virtio_queue queue;
93
94     struct v3_dev_blk_ops * ops;
95
96     void * backend_data;
97
98     int io_range_size;
99
100     struct virtio_dev_state * virtio_dev;
101
102     struct list_head dev_link;
103 };
104
105
106 static int virtio_free(struct vm_device * dev) {
107     return -1;
108 }
109
110 static int blk_reset(struct virtio_blk_state * virtio) {
111
112     virtio->queue.ring_desc_addr = 0;
113     virtio->queue.ring_avail_addr = 0;
114     virtio->queue.ring_used_addr = 0;
115     virtio->queue.pfn = 0;
116     virtio->queue.cur_avail_idx = 0;
117
118     virtio->virtio_cfg.status = 0;
119     virtio->virtio_cfg.pci_isr = 0;
120     return 0;
121 }
122
123
124 static int virtio_reset(struct vm_device * dev) {
125     struct virtio_dev_state * dev_state = (struct virtio_dev_state *)(dev->private_data);
126     struct virtio_blk_state * blk_state = NULL;
127
128     list_for_each_entry(blk_state, &(dev_state->dev_list), dev_link) {
129         blk_reset(blk_state);
130     }
131
132     return 0;
133 }
134
135 static int handle_read_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
136     int ret = -1;
137
138     PrintDebug("Reading Disk\n");
139     ret = blk_state->ops->read(buf, *sector, len, (void *)(blk_state->backend_data));
140     *sector += len;
141
142     return ret;
143 }
144
145
146 static int handle_write_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
147     int ret = -1;
148
149     PrintDebug("Writing Disk\n");
150     ret = blk_state->ops->write(buf, *sector, len, (void *)(blk_state->backend_data));
151     *sector += len;
152
153     return ret;
154 }
155
156
157
158 // multiple block operations need to increment the sector 
159
160 static int handle_block_op(struct virtio_blk_state * blk_state, struct blk_op_hdr * hdr, 
161                            struct vring_desc * buf_desc, uint8_t * status) {
162     uint8_t * buf = NULL;
163
164     PrintDebug("Handling Block op\n");
165     if (guest_pa_to_host_va(blk_state->virtio_dev->vm, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
166         PrintError("Could not translate buffer address\n");
167         return -1;
168     }
169
170     PrintDebug("Sector=%p Length=%d\n", (void *)(addr_t)(hdr->sector), buf_desc->length);
171
172     if (hdr->type == BLK_IN_REQ) {
173         if (handle_read_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
174             *status = BLK_STATUS_ERR;
175             return -1;
176         } else {
177             *status = BLK_STATUS_OK;
178         }
179     } else if (hdr->type == BLK_OUT_REQ) {
180         if (handle_write_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
181             *status = BLK_STATUS_ERR;
182             return -1;
183         } else {
184             *status = BLK_STATUS_OK;
185         }
186     } else if (hdr->type == BLK_SCSI_CMD) {
187         PrintError("VIRTIO: SCSI Command Not supported!!!\n");
188         *status = BLK_STATUS_NOT_SUPPORTED;
189         return -1;
190     }
191
192     PrintDebug("Returning Status: %d\n", *status);
193
194     return 0;
195 }
196
197 static int get_desc_count(struct virtio_queue * q, int index) {
198     struct vring_desc * tmp_desc = &(q->desc[index]);
199     int cnt = 1;
200     
201     while (tmp_desc->flags & VIRTIO_NEXT_FLAG) {
202         tmp_desc = &(q->desc[tmp_desc->next]);
203         cnt++;
204     }
205
206     return cnt;
207 }
208
209
210
211 static int handle_kick(struct virtio_blk_state * blk_state) {  
212     struct virtio_queue * q = &(blk_state->queue);
213
214     PrintDebug("VIRTIO KICK: cur_index=%d (mod=%d), avail_index=%d\n", 
215                q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index);
216
217     while (q->cur_avail_idx < q->avail->index) {
218         struct vring_desc * hdr_desc = NULL;
219         struct vring_desc * buf_desc = NULL;
220         struct vring_desc * status_desc = NULL;
221         struct blk_op_hdr hdr;
222         addr_t hdr_addr = 0;
223         uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
224         int desc_cnt = get_desc_count(q, desc_idx);
225         int i = 0;
226         uint8_t * status_ptr = NULL;
227         uint8_t status = BLK_STATUS_OK;
228         uint32_t req_len = 0;
229
230         PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE);
231
232         if (desc_cnt < 3) {
233             PrintError("Block operations must include at least 3 descriptors\n");
234             return -1;
235         }
236
237         hdr_desc = &(q->desc[desc_idx]);
238
239
240         PrintDebug("Header Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", hdr_desc, 
241                    (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next);    
242
243         if (guest_pa_to_host_va(blk_state->virtio_dev->vm, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
244             PrintError("Could not translate block header address\n");
245             return -1;
246         }
247
248         // We copy the block op header out because we are going to modify its contents
249         memcpy(&hdr, (void *)hdr_addr, sizeof(struct blk_op_hdr));
250         
251         PrintDebug("Blk Op Hdr (ptr=%p) type=%d, sector=%p\n", (void *)hdr_addr, hdr.type, (void *)hdr.sector);
252
253         desc_idx = hdr_desc->next;
254
255         for (i = 0; i < desc_cnt - 2; i++) {
256             uint8_t tmp_status = BLK_STATUS_OK;
257
258             buf_desc = &(q->desc[desc_idx]);
259
260             PrintDebug("Buffer Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", buf_desc, 
261                        (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next);
262
263             if (handle_block_op(blk_state, &hdr, buf_desc, &tmp_status) == -1) {
264                 PrintError("Error handling block operation\n");
265                 return -1;
266             }
267
268             if (tmp_status != BLK_STATUS_OK) {
269                 status = tmp_status;
270             }
271
272             req_len += buf_desc->length;
273             desc_idx = buf_desc->next;
274         }
275
276         status_desc = &(q->desc[desc_idx]);
277
278         PrintDebug("Status Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", status_desc, 
279                    (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next);
280
281         if (guest_pa_to_host_va(blk_state->virtio_dev->vm, status_desc->addr_gpa, (addr_t *)&(status_ptr)) == -1) {
282             PrintError("Could not translate status address\n");
283             return -1;
284         }
285
286         req_len += status_desc->length;
287         *status_ptr = status;
288
289         q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
290         q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to????
291
292         q->used->index++;
293         q->cur_avail_idx++;
294     }
295
296     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
297         PrintDebug("Raising IRQ %d\n",  blk_state->pci_dev->config_header.intr_line);
298         v3_pci_raise_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
299         blk_state->virtio_cfg.pci_isr = 1;
300     }
301
302     return 0;
303 }
304
305 static int virtio_io_write(uint16_t port, void * src, uint_t length, void * private_data) {
306     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
307     int port_idx = port % blk_state->io_range_size;
308
309
310     PrintDebug("VIRTIO BLOCK Write for port %d (index=%d) len=%d, value=%x\n", 
311                port, port_idx,  length, *(uint32_t *)src);
312
313
314
315     switch (port_idx) {
316         case GUEST_FEATURES_PORT:
317             if (length != 4) {
318                 PrintError("Illegal write length for guest features\n");
319                 return -1;
320             }
321             
322             blk_state->virtio_cfg.guest_features = *(uint32_t *)src;
323             PrintDebug("Setting Guest Features to %x\n", blk_state->virtio_cfg.guest_features);
324
325             break;
326         case VRING_PG_NUM_PORT:
327             if (length == 4) {
328                 addr_t pfn = *(uint32_t *)src;
329                 addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT);
330
331
332                 blk_state->queue.pfn = pfn;
333                 
334                 blk_state->queue.ring_desc_addr = page_addr ;
335                 blk_state->queue.ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc));
336                 blk_state->queue.ring_used_addr = ( blk_state->queue.ring_avail_addr + \
337                                                  sizeof(struct vring_avail)    + \
338                                                  (QUEUE_SIZE * sizeof(uint16_t)));
339                 
340                 // round up to next page boundary.
341                 blk_state->queue.ring_used_addr = (blk_state->queue.ring_used_addr + 0xfff) & ~0xfff;
342
343                 if (guest_pa_to_host_va(blk_state->virtio_dev->vm, blk_state->queue.ring_desc_addr, (addr_t *)&(blk_state->queue.desc)) == -1) {
344                     PrintError("Could not translate ring descriptor address\n");
345                     return -1;
346                 }
347
348
349                 if (guest_pa_to_host_va(blk_state->virtio_dev->vm, blk_state->queue.ring_avail_addr, (addr_t *)&(blk_state->queue.avail)) == -1) {
350                     PrintError("Could not translate ring available address\n");
351                     return -1;
352                 }
353
354
355                 if (guest_pa_to_host_va(blk_state->virtio_dev->vm, blk_state->queue.ring_used_addr, (addr_t *)&(blk_state->queue.used)) == -1) {
356                     PrintError("Could not translate ring used address\n");
357                     return -1;
358                 }
359
360                 PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n",
361                            (void *)(blk_state->queue.ring_desc_addr),
362                            (void *)(blk_state->queue.ring_avail_addr),
363                            (void *)(blk_state->queue.ring_used_addr));
364
365                 PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", 
366                            blk_state->queue.desc, blk_state->queue.avail, blk_state->queue.used);
367
368             } else {
369                 PrintError("Illegal write length for page frame number\n");
370                 return -1;
371             }
372             break;
373         case VRING_Q_SEL_PORT:
374             blk_state->virtio_cfg.vring_queue_selector = *(uint16_t *)src;
375
376             if (blk_state->virtio_cfg.vring_queue_selector != 0) {
377                 PrintError("Virtio Block device only uses 1 queue, selected %d\n", 
378                            blk_state->virtio_cfg.vring_queue_selector);
379                 return -1;
380             }
381
382             break;
383         case VRING_Q_NOTIFY_PORT:
384             PrintDebug("Handling Kick\n");
385             if (handle_kick(blk_state) == -1) {
386                 PrintError("Could not handle Block Notification\n");
387                 return -1;
388             }
389             break;
390         case VIRTIO_STATUS_PORT:
391             blk_state->virtio_cfg.status = *(uint8_t *)src;
392
393             if (blk_state->virtio_cfg.status == 0) {
394                 PrintDebug("Resetting device\n");
395                 blk_reset(blk_state);
396             }
397
398             break;
399
400         case VIRTIO_ISR_PORT:
401             blk_state->virtio_cfg.pci_isr = *(uint8_t *)src;
402             break;
403         default:
404             return -1;
405             break;
406     }
407
408     return length;
409 }
410
411
412 static int virtio_io_read(uint16_t port, void * dst, uint_t length, void * private_data) {
413     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
414     int port_idx = port % blk_state->io_range_size;
415
416
417     PrintDebug("VIRTIO BLOCK Read  for port %d (index =%d), length=%d\n", 
418                port, port_idx, length);
419
420     switch (port_idx) {
421         case HOST_FEATURES_PORT:
422             if (length != 4) {
423                 PrintError("Illegal read length for host features\n");
424                 return -1;
425             }
426
427             *(uint32_t *)dst = blk_state->virtio_cfg.host_features;
428         
429             break;
430         case VRING_PG_NUM_PORT:
431             if (length != 4) {
432                 PrintError("Illegal read length for page frame number\n");
433                 return -1;
434             }
435
436             *(uint32_t *)dst = blk_state->queue.pfn;
437
438             break;
439         case VRING_SIZE_PORT:
440             if (length != 2) {
441                 PrintError("Illegal read length for vring size\n");
442                 return -1;
443             }
444                 
445             *(uint16_t *)dst = blk_state->queue.queue_size;
446
447             break;
448
449         case VIRTIO_STATUS_PORT:
450             if (length != 1) {
451                 PrintError("Illegal read length for status\n");
452                 return -1;
453             }
454
455             *(uint8_t *)dst = blk_state->virtio_cfg.status;
456             break;
457
458         case VIRTIO_ISR_PORT:
459             *(uint8_t *)dst = blk_state->virtio_cfg.pci_isr;
460             blk_state->virtio_cfg.pci_isr = 0;
461             v3_pci_lower_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
462             break;
463
464         default:
465             if ( (port_idx >= sizeof(struct virtio_config)) && 
466                  (port_idx < (sizeof(struct virtio_config) + sizeof(struct blk_config))) ) {
467                 int cfg_offset = port_idx - sizeof(struct virtio_config);
468                 uint8_t * cfg_ptr = (uint8_t *)&(blk_state->block_cfg);
469
470                 memcpy(dst, cfg_ptr + cfg_offset, length);
471                 
472             } else {
473                 PrintError("Read of Unhandled Virtio Read\n");
474                 return -1;
475             }
476           
477             break;
478     }
479
480     return length;
481 }
482
483
484
485
486 static struct v3_device_ops dev_ops = {
487     .free = virtio_free,
488     .reset = virtio_reset,
489     .start = NULL,
490     .stop = NULL,
491 };
492
493
494
495
496
497 static int register_dev(struct virtio_dev_state * virtio, struct virtio_blk_state * blk_state) {
498     // initialize PCI
499     struct pci_device * pci_dev = NULL;
500     struct v3_pci_bar bars[6];
501     int num_ports = sizeof(struct virtio_config) + sizeof(struct blk_config);
502     int tmp_ports = num_ports;
503     int i;
504
505
506
507     // This gets the number of ports, rounded up to a power of 2
508     blk_state->io_range_size = 1; // must be a power of 2
509     
510     while (tmp_ports > 0) {
511         tmp_ports >>= 1;
512         blk_state->io_range_size <<= 1;
513     }
514         
515     // this is to account for any low order bits being set in num_ports
516     // if there are none, then num_ports was already a power of 2 so we shift right to reset it
517     if ((num_ports & ((blk_state->io_range_size >> 1) - 1)) == 0) {
518         blk_state->io_range_size >>= 1;
519     }
520     
521     
522     for (i = 0; i < 6; i++) {
523         bars[i].type = PCI_BAR_NONE;
524     }
525     
526     PrintDebug("Virtio-BLK io_range_size = %d\n", blk_state->io_range_size);
527     
528     bars[0].type = PCI_BAR_IO;
529     bars[0].default_base_port = -1;
530     bars[0].num_ports = blk_state->io_range_size;
531     
532     bars[0].io_read = virtio_io_read;
533     bars[0].io_write = virtio_io_write;
534     bars[0].private_data = blk_state;
535     
536     pci_dev = v3_pci_register_device(virtio->pci_bus, PCI_STD_DEVICE, 
537                                      0, PCI_AUTO_DEV_NUM, 0,
538                                      "LNX_VIRTIO_BLK", bars,
539                                      NULL, NULL, NULL, blk_state);
540     
541     if (!pci_dev) {
542         PrintError("Could not register PCI Device\n");
543         return -1;
544     }
545     
546     pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID;
547     pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID;
548     
549     
550     pci_dev->config_header.device_id = VIRTIO_BLOCK_DEV_ID;
551     pci_dev->config_header.class = PCI_CLASS_STORAGE;
552     pci_dev->config_header.subclass = PCI_STORAGE_SUBCLASS_OTHER;
553     
554     pci_dev->config_header.subsystem_id = VIRTIO_BLOCK_SUBDEVICE_ID;
555     
556     
557     pci_dev->config_header.intr_pin = 1;
558     
559     pci_dev->config_header.max_latency = 1; // ?? (qemu does it...)
560     
561     
562     blk_state->pci_dev = pci_dev;
563     
564     /* Block configuration */
565     blk_state->virtio_cfg.host_features = VIRTIO_SEG_MAX;
566     blk_state->block_cfg.max_seg = QUEUE_SIZE - 2;
567
568     // Virtio Block only uses one queue
569     blk_state->queue.queue_size = QUEUE_SIZE;
570
571     blk_state->virtio_dev = virtio;
572
573     blk_reset(blk_state);
574
575
576     return 0;
577 }
578
579
580 static int connect_fn(struct guest_info * info, 
581                       void * frontend_data, 
582                       struct v3_dev_blk_ops * ops, 
583                       v3_cfg_tree_t * cfg, 
584                       void * private_data) {
585
586     struct virtio_dev_state * virtio = (struct virtio_dev_state *)frontend_data;
587
588     struct virtio_blk_state * blk_state  = (struct virtio_blk_state *)V3_Malloc(sizeof(struct virtio_blk_state));
589     memset(blk_state, 0, sizeof(struct virtio_blk_state));
590
591     register_dev(virtio, blk_state);
592
593     blk_state->ops = ops;
594     blk_state->backend_data = private_data;
595
596     blk_state->block_cfg.capacity = ops->get_capacity(private_data);
597
598     PrintDebug("Virtio Capacity = %d -- 0x%p\n", (int)(virtio->block_cfg.capacity), 
599                (void *)(addr_t)(virtio->block_cfg.capacity));
600
601     return 0;
602 }
603
604
605 static int virtio_init(struct guest_info * vm, v3_cfg_tree_t * cfg) {
606     struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
607     struct virtio_dev_state * virtio_state = NULL;
608     char * name = v3_cfg_val(cfg, "name");
609
610     PrintDebug("Initializing VIRTIO Block device\n");
611
612     if (pci_bus == NULL) {
613         PrintError("VirtIO devices require a PCI Bus");
614         return -1;
615     }
616
617
618     virtio_state  = (struct virtio_dev_state *)V3_Malloc(sizeof(struct virtio_dev_state));
619     memset(virtio_state, 0, sizeof(struct virtio_dev_state));
620
621     INIT_LIST_HEAD(&(virtio_state->dev_list));
622     virtio_state->pci_bus = pci_bus;
623     virtio_state->vm = vm;
624
625     struct vm_device * dev = v3_allocate_device(name, &dev_ops, virtio_state);
626     if (v3_attach_device(vm, dev) == -1) {
627         PrintError("Could not attach device %s\n", name);
628         return -1;
629     }
630
631     if (v3_dev_add_blk_frontend(vm, name, connect_fn, (void *)virtio_state) == -1) {
632         PrintError("Could not register %s as block frontend\n", name);
633         return -1;
634     }
635
636     return 0;
637 }
638
639
640 device_register("LNX_VIRTIO_BLK", virtio_init)