Palacios Public Git Repository

To checkout Palacios execute

  git clone http://v3vee.org/palacios/palacios.web/palacios.git
This will give you the master branch. You probably want the devel branch or one of the release branches. To switch to the devel branch, simply execute
  cd palacios
  git checkout --track -b devel origin/devel
The other branches are similar.


ffb8c24327e13d9dfbf2aa42d5675ab771157773
[palacios.git] / palacios / src / devices / lnx_virtio_blk.c
1 /* 
2  * This file is part of the Palacios Virtual Machine Monitor developed
3  * by the V3VEE Project with funding from the United States National 
4  * Science Foundation and the Department of Energy.  
5  *
6  * The V3VEE Project is a joint project between Northwestern University
7  * and the University of New Mexico.  You can find out more at 
8  * http://www.v3vee.org
9  *
10  * Copyright (c) 2008, Jack Lange <jarusl@cs.northwestern.edu>
11  * Copyright (c) 2008, The V3VEE Project <http://www.v3vee.org> 
12  * All rights reserved.
13  *
14  * Author: Jack Lange <jarusl@cs.northwestern.edu>
15  *
16  * This is free software.  You are permitted to use,
17  * redistribute, and modify it as specified in the file "V3VEE_LICENSE".
18  */
19
20 #include <palacios/vmm.h>
21 #include <palacios/vmm_dev_mgr.h>
22 #include <devices/lnx_virtio_pci.h>
23 #include <palacios/vm_guest_mem.h>
24
25 #include <devices/pci.h>
26
27
28
29 #ifndef CONFIG_DEBUG_VIRTIO_BLK
30 #undef PrintDebug
31 #define PrintDebug(fmt, args...)
32 #endif
33
34
35 #define SECTOR_SIZE 512
36
37 #define BLK_CAPACITY_PORT     20
38 #define BLK_MAX_SIZE_PORT     28
39 #define BLK_MAX_SEG_PORT      32
40 #define BLK_CYLINDERS_PORT    36
41 #define BLK_HEADS_PORT        38
42 #define BLK_SECTS_PORT        39
43
44 #define BLK_IN_REQ            0
45 #define BLK_OUT_REQ           1
46 #define BLK_SCSI_CMD          2
47
48 #define BLK_BARRIER_FLAG     0x80000000
49
50 #define BLK_STATUS_OK             0
51 #define BLK_STATUS_ERR            1
52 #define BLK_STATUS_NOT_SUPPORTED  2
53
54
55 struct blk_config {
56     uint64_t capacity;
57     uint32_t max_size;
58     uint32_t max_seg;
59     uint16_t cylinders;
60     uint8_t heads;
61     uint8_t sectors;
62 } __attribute__((packed));
63
64
65
66 struct blk_op_hdr {
67     uint32_t type;
68     uint32_t prior;
69     uint64_t sector;
70 } __attribute__((packed));
71
72 #define QUEUE_SIZE 128
73
74 /* Host Feature flags */
75 #define VIRTIO_BARRIER       0x01       /* Does host support barriers? */
76 #define VIRTIO_SIZE_MAX      0x02       /* Indicates maximum segment size */
77 #define VIRTIO_SEG_MAX       0x04       /* Indicates maximum # of segments */
78 #define VIRTIO_LEGACY_GEOM   0x10       /* Indicates support of legacy geometry */
79
80
81 struct virtio_dev_state {
82     struct vm_device * pci_bus;
83     struct list_head dev_list;
84 };
85
86 struct virtio_blk_state {
87
88     struct pci_device * pci_dev;
89     struct blk_config block_cfg;
90     struct virtio_config virtio_cfg;
91
92     
93     struct virtio_queue queue;
94
95     struct v3_dev_blk_ops * ops;
96
97     void * backend_data;
98
99     int io_range_size;
100
101     struct virtio_dev_state * virtio_dev;
102
103     struct list_head dev_link;
104 };
105
106
107 static int virtio_free(struct vm_device * dev) {
108     return -1;
109 }
110
111 static int blk_reset(struct virtio_blk_state * virtio) {
112
113     virtio->queue.ring_desc_addr = 0;
114     virtio->queue.ring_avail_addr = 0;
115     virtio->queue.ring_used_addr = 0;
116     virtio->queue.pfn = 0;
117     virtio->queue.cur_avail_idx = 0;
118
119     virtio->virtio_cfg.status = 0;
120     virtio->virtio_cfg.pci_isr = 0;
121     return 0;
122 }
123
124
125 static int virtio_reset(struct vm_device * dev) {
126     struct virtio_dev_state * dev_state = (struct virtio_dev_state *)(dev->private_data);
127     struct virtio_blk_state * blk_state = NULL;
128
129     list_for_each_entry(blk_state, &(dev_state->dev_list), dev_link) {
130         blk_reset(blk_state);
131     }
132
133     return 0;
134 }
135
136 static int handle_read_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
137     int ret = -1;
138
139     PrintDebug("Reading Disk\n");
140     ret = blk_state->ops->read(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
141     *sector += (len / SECTOR_SIZE);
142
143     return ret;
144 }
145
146
147 static int handle_write_op(struct virtio_blk_state * blk_state, uint8_t * buf, uint64_t * sector, uint64_t len) {
148     int ret = -1;
149
150     PrintDebug("Writing Disk\n");
151     ret = blk_state->ops->write(buf, (*sector) * SECTOR_SIZE, len, (void *)(blk_state->backend_data));
152     *sector += (len / SECTOR_SIZE);
153
154     return ret;
155 }
156
157
158
159 // multiple block operations need to increment the sector 
160
161 static int handle_block_op(struct guest_info * core, struct virtio_blk_state * blk_state, struct blk_op_hdr * hdr, 
162                            struct vring_desc * buf_desc, uint8_t * status) {
163     uint8_t * buf = NULL;
164
165     PrintDebug("Handling Block op\n");
166     if (v3_gpa_to_hva(core, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) {
167         PrintError("Could not translate buffer address\n");
168         return -1;
169     }
170
171     PrintDebug("Sector=%p Length=%d\n", (void *)(addr_t)(hdr->sector), buf_desc->length);
172
173     if (hdr->type == BLK_IN_REQ) {
174         if (handle_read_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
175             *status = BLK_STATUS_ERR;
176             return -1;
177         } else {
178             *status = BLK_STATUS_OK;
179         }
180     } else if (hdr->type == BLK_OUT_REQ) {
181         if (handle_write_op(blk_state, buf, &(hdr->sector), buf_desc->length) == -1) {
182             *status = BLK_STATUS_ERR;
183             return -1;
184         } else {
185             *status = BLK_STATUS_OK;
186         }
187     } else if (hdr->type == BLK_SCSI_CMD) {
188         PrintError("VIRTIO: SCSI Command Not supported!!!\n");
189         *status = BLK_STATUS_NOT_SUPPORTED;
190         return -1;
191     }
192
193     PrintDebug("Returning Status: %d\n", *status);
194
195     return 0;
196 }
197
198 static int get_desc_count(struct virtio_queue * q, int index) {
199     struct vring_desc * tmp_desc = &(q->desc[index]);
200     int cnt = 1;
201     
202     while (tmp_desc->flags & VIRTIO_NEXT_FLAG) {
203         tmp_desc = &(q->desc[tmp_desc->next]);
204         cnt++;
205     }
206
207     return cnt;
208 }
209
210
211
212 static int handle_kick(struct guest_info * core, struct virtio_blk_state * blk_state) {  
213     struct virtio_queue * q = &(blk_state->queue);
214
215     PrintDebug("VIRTIO KICK: cur_index=%d (mod=%d), avail_index=%d\n", 
216                q->cur_avail_idx, q->cur_avail_idx % QUEUE_SIZE, q->avail->index);
217
218     while (q->cur_avail_idx != q->avail->index) {
219         struct vring_desc * hdr_desc = NULL;
220         struct vring_desc * buf_desc = NULL;
221         struct vring_desc * status_desc = NULL;
222         struct blk_op_hdr hdr;
223         addr_t hdr_addr = 0;
224         uint16_t desc_idx = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
225         int desc_cnt = get_desc_count(q, desc_idx);
226         int i = 0;
227         uint8_t * status_ptr = NULL;
228         uint8_t status = BLK_STATUS_OK;
229         uint32_t req_len = 0;
230
231         PrintDebug("Descriptor Count=%d, index=%d\n", desc_cnt, q->cur_avail_idx % QUEUE_SIZE);
232
233         if (desc_cnt < 3) {
234             PrintError("Block operations must include at least 3 descriptors\n");
235             return -1;
236         }
237
238         hdr_desc = &(q->desc[desc_idx]);
239
240
241         PrintDebug("Header Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", hdr_desc, 
242                    (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next);    
243
244         if (v3_gpa_to_hva(core, hdr_desc->addr_gpa, &(hdr_addr)) == -1) {
245             PrintError("Could not translate block header address\n");
246             return -1;
247         }
248
249         // We copy the block op header out because we are going to modify its contents
250         memcpy(&hdr, (void *)hdr_addr, sizeof(struct blk_op_hdr));
251         
252         PrintDebug("Blk Op Hdr (ptr=%p) type=%d, sector=%p\n", (void *)hdr_addr, hdr.type, (void *)hdr.sector);
253
254         desc_idx = hdr_desc->next;
255
256         for (i = 0; i < desc_cnt - 2; i++) {
257             uint8_t tmp_status = BLK_STATUS_OK;
258
259             buf_desc = &(q->desc[desc_idx]);
260
261             PrintDebug("Buffer Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", buf_desc, 
262                        (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next);
263
264             if (handle_block_op(core, blk_state, &hdr, buf_desc, &tmp_status) == -1) {
265                 PrintError("Error handling block operation\n");
266                 return -1;
267             }
268
269             if (tmp_status != BLK_STATUS_OK) {
270                 status = tmp_status;
271             }
272
273             req_len += buf_desc->length;
274             desc_idx = buf_desc->next;
275         }
276
277         status_desc = &(q->desc[desc_idx]);
278
279         PrintDebug("Status Descriptor (ptr=%p) gpa=%p, len=%d, flags=%x, next=%d\n", status_desc, 
280                    (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next);
281
282         if (v3_gpa_to_hva(core, status_desc->addr_gpa, (addr_t *)&(status_ptr)) == -1) {
283             PrintError("Could not translate status address\n");
284             return -1;
285         }
286
287         req_len += status_desc->length;
288         *status_ptr = status;
289
290         q->used->ring[q->used->index % QUEUE_SIZE].id = q->avail->ring[q->cur_avail_idx % QUEUE_SIZE];
291         q->used->ring[q->used->index % QUEUE_SIZE].length = req_len; // What do we set this to????
292
293         q->used->index++;
294         q->cur_avail_idx++;
295     }
296
297     if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) {
298         PrintDebug("Raising IRQ %d\n",  blk_state->pci_dev->config_header.intr_line);
299         v3_pci_raise_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
300         blk_state->virtio_cfg.pci_isr = 1;
301     }
302
303     return 0;
304 }
305
306 static int virtio_io_write(struct guest_info * core, uint16_t port, void * src, uint_t length, void * private_data) {
307     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
308     int port_idx = port % blk_state->io_range_size;
309
310
311     PrintDebug("VIRTIO BLOCK Write for port %d (index=%d) len=%d, value=%x\n", 
312                port, port_idx,  length, *(uint32_t *)src);
313
314
315
316     switch (port_idx) {
317         case GUEST_FEATURES_PORT:
318             if (length != 4) {
319                 PrintError("Illegal write length for guest features\n");
320                 return -1;
321             }
322             
323             blk_state->virtio_cfg.guest_features = *(uint32_t *)src;
324             PrintDebug("Setting Guest Features to %x\n", blk_state->virtio_cfg.guest_features);
325
326             break;
327         case VRING_PG_NUM_PORT:
328             if (length == 4) {
329                 addr_t pfn = *(uint32_t *)src;
330                 addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT);
331
332
333                 blk_state->queue.pfn = pfn;
334                 
335                 blk_state->queue.ring_desc_addr = page_addr ;
336                 blk_state->queue.ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc));
337                 blk_state->queue.ring_used_addr = ( blk_state->queue.ring_avail_addr + \
338                                                  sizeof(struct vring_avail)    + \
339                                                  (QUEUE_SIZE * sizeof(uint16_t)));
340                 
341                 // round up to next page boundary.
342                 blk_state->queue.ring_used_addr = (blk_state->queue.ring_used_addr + 0xfff) & ~0xfff;
343
344                 if (v3_gpa_to_hva(core, blk_state->queue.ring_desc_addr, (addr_t *)&(blk_state->queue.desc)) == -1) {
345                     PrintError("Could not translate ring descriptor address\n");
346                     return -1;
347                 }
348
349
350                 if (v3_gpa_to_hva(core, blk_state->queue.ring_avail_addr, (addr_t *)&(blk_state->queue.avail)) == -1) {
351                     PrintError("Could not translate ring available address\n");
352                     return -1;
353                 }
354
355
356                 if (v3_gpa_to_hva(core, blk_state->queue.ring_used_addr, (addr_t *)&(blk_state->queue.used)) == -1) {
357                     PrintError("Could not translate ring used address\n");
358                     return -1;
359                 }
360
361                 PrintDebug("RingDesc_addr=%p, Avail_addr=%p, Used_addr=%p\n",
362                            (void *)(blk_state->queue.ring_desc_addr),
363                            (void *)(blk_state->queue.ring_avail_addr),
364                            (void *)(blk_state->queue.ring_used_addr));
365
366                 PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", 
367                            blk_state->queue.desc, blk_state->queue.avail, blk_state->queue.used);
368
369             } else {
370                 PrintError("Illegal write length for page frame number\n");
371                 return -1;
372             }
373             break;
374         case VRING_Q_SEL_PORT:
375             blk_state->virtio_cfg.vring_queue_selector = *(uint16_t *)src;
376
377             if (blk_state->virtio_cfg.vring_queue_selector != 0) {
378                 PrintError("Virtio Block device only uses 1 queue, selected %d\n", 
379                            blk_state->virtio_cfg.vring_queue_selector);
380                 return -1;
381             }
382
383             break;
384         case VRING_Q_NOTIFY_PORT:
385             PrintDebug("Handling Kick\n");
386             if (handle_kick(core, blk_state) == -1) {
387                 PrintError("Could not handle Block Notification\n");
388                 return -1;
389             }
390             break;
391         case VIRTIO_STATUS_PORT:
392             blk_state->virtio_cfg.status = *(uint8_t *)src;
393
394             if (blk_state->virtio_cfg.status == 0) {
395                 PrintDebug("Resetting device\n");
396                 blk_reset(blk_state);
397             }
398
399             break;
400
401         case VIRTIO_ISR_PORT:
402             blk_state->virtio_cfg.pci_isr = *(uint8_t *)src;
403             break;
404         default:
405             return -1;
406             break;
407     }
408
409     return length;
410 }
411
412
413 static int virtio_io_read(struct guest_info * core, uint16_t port, void * dst, uint_t length, void * private_data) {
414     struct virtio_blk_state * blk_state = (struct virtio_blk_state *)private_data;
415     int port_idx = port % blk_state->io_range_size;
416
417
418     PrintDebug("VIRTIO BLOCK Read  for port %d (index =%d), length=%d\n", 
419                port, port_idx, length);
420
421     switch (port_idx) {
422         case HOST_FEATURES_PORT:
423             if (length != 4) {
424                 PrintError("Illegal read length for host features\n");
425                 return -1;
426             }
427
428             *(uint32_t *)dst = blk_state->virtio_cfg.host_features;
429         
430             break;
431         case VRING_PG_NUM_PORT:
432             if (length != 4) {
433                 PrintError("Illegal read length for page frame number\n");
434                 return -1;
435             }
436
437             *(uint32_t *)dst = blk_state->queue.pfn;
438
439             break;
440         case VRING_SIZE_PORT:
441             if (length != 2) {
442                 PrintError("Illegal read length for vring size\n");
443                 return -1;
444             }
445                 
446             *(uint16_t *)dst = blk_state->queue.queue_size;
447
448             break;
449
450         case VIRTIO_STATUS_PORT:
451             if (length != 1) {
452                 PrintError("Illegal read length for status\n");
453                 return -1;
454             }
455
456             *(uint8_t *)dst = blk_state->virtio_cfg.status;
457             break;
458
459         case VIRTIO_ISR_PORT:
460             *(uint8_t *)dst = blk_state->virtio_cfg.pci_isr;
461             blk_state->virtio_cfg.pci_isr = 0;
462             v3_pci_lower_irq(blk_state->virtio_dev->pci_bus, 0, blk_state->pci_dev);
463             break;
464
465         default:
466             if ( (port_idx >= sizeof(struct virtio_config)) && 
467                  (port_idx < (sizeof(struct virtio_config) + sizeof(struct blk_config))) ) {
468                 int cfg_offset = port_idx - sizeof(struct virtio_config);
469                 uint8_t * cfg_ptr = (uint8_t *)&(blk_state->block_cfg);
470
471                 memcpy(dst, cfg_ptr + cfg_offset, length);
472                 
473             } else {
474                 PrintError("Read of Unhandled Virtio Read\n");
475                 return -1;
476             }
477           
478             break;
479     }
480
481     return length;
482 }
483
484
485
486
487 static struct v3_device_ops dev_ops = {
488     .free = virtio_free,
489     .reset = virtio_reset,
490     .start = NULL,
491     .stop = NULL,
492 };
493
494
495
496
497
498 static int register_dev(struct virtio_dev_state * virtio, struct virtio_blk_state * blk_state) {
499     // initialize PCI
500     struct pci_device * pci_dev = NULL;
501     struct v3_pci_bar bars[6];
502     int num_ports = sizeof(struct virtio_config) + sizeof(struct blk_config);
503     int tmp_ports = num_ports;
504     int i;
505
506
507
508     // This gets the number of ports, rounded up to a power of 2
509     blk_state->io_range_size = 1; // must be a power of 2
510     
511     while (tmp_ports > 0) {
512         tmp_ports >>= 1;
513         blk_state->io_range_size <<= 1;
514     }
515         
516     // this is to account for any low order bits being set in num_ports
517     // if there are none, then num_ports was already a power of 2 so we shift right to reset it
518     if ((num_ports & ((blk_state->io_range_size >> 1) - 1)) == 0) {
519         blk_state->io_range_size >>= 1;
520     }
521     
522     
523     for (i = 0; i < 6; i++) {
524         bars[i].type = PCI_BAR_NONE;
525     }
526     
527     PrintDebug("Virtio-BLK io_range_size = %d\n", blk_state->io_range_size);
528     
529     bars[0].type = PCI_BAR_IO;
530     bars[0].default_base_port = -1;
531     bars[0].num_ports = blk_state->io_range_size;
532     
533     bars[0].io_read = virtio_io_read;
534     bars[0].io_write = virtio_io_write;
535     bars[0].private_data = blk_state;
536     
537     pci_dev = v3_pci_register_device(virtio->pci_bus, PCI_STD_DEVICE, 
538                                      0, PCI_AUTO_DEV_NUM, 0,
539                                      "LNX_VIRTIO_BLK", bars,
540                                      NULL, NULL, NULL, blk_state);
541     
542     if (!pci_dev) {
543         PrintError("Could not register PCI Device\n");
544         return -1;
545     }
546     
547     pci_dev->config_header.vendor_id = VIRTIO_VENDOR_ID;
548     pci_dev->config_header.subsystem_vendor_id = VIRTIO_SUBVENDOR_ID;
549     
550     
551     pci_dev->config_header.device_id = VIRTIO_BLOCK_DEV_ID;
552     pci_dev->config_header.class = PCI_CLASS_STORAGE;
553     pci_dev->config_header.subclass = PCI_STORAGE_SUBCLASS_OTHER;
554     
555     pci_dev->config_header.subsystem_id = VIRTIO_BLOCK_SUBDEVICE_ID;
556     
557     
558     pci_dev->config_header.intr_pin = 1;
559     
560     pci_dev->config_header.max_latency = 1; // ?? (qemu does it...)
561     
562     
563     blk_state->pci_dev = pci_dev;
564     
565     /* Block configuration */
566     blk_state->virtio_cfg.host_features = VIRTIO_SEG_MAX;
567     blk_state->block_cfg.max_seg = QUEUE_SIZE - 2;
568
569     // Virtio Block only uses one queue
570     blk_state->queue.queue_size = QUEUE_SIZE;
571
572     blk_state->virtio_dev = virtio;
573
574     blk_reset(blk_state);
575
576
577     return 0;
578 }
579
580
581 static int connect_fn(struct v3_vm_info * vm, 
582                       void * frontend_data, 
583                       struct v3_dev_blk_ops * ops, 
584                       v3_cfg_tree_t * cfg, 
585                       void * private_data) {
586
587     struct virtio_dev_state * virtio = (struct virtio_dev_state *)frontend_data;
588
589     struct virtio_blk_state * blk_state  = (struct virtio_blk_state *)V3_Malloc(sizeof(struct virtio_blk_state));
590     memset(blk_state, 0, sizeof(struct virtio_blk_state));
591
592     register_dev(virtio, blk_state);
593
594     blk_state->ops = ops;
595     blk_state->backend_data = private_data;
596
597     blk_state->block_cfg.capacity = ops->get_capacity(private_data) / SECTOR_SIZE;
598
599     PrintDebug("Virtio Capacity = %d -- 0x%p\n", (int)(blk_state->block_cfg.capacity), 
600                (void *)(addr_t)(blk_state->block_cfg.capacity));
601
602     return 0;
603 }
604
605
606 static int virtio_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) {
607     struct vm_device * pci_bus = v3_find_dev(vm, v3_cfg_val(cfg, "bus"));
608     struct virtio_dev_state * virtio_state = NULL;
609     char * dev_id = v3_cfg_val(cfg, "ID");
610
611     PrintDebug("Initializing VIRTIO Block device\n");
612
613     if (pci_bus == NULL) {
614         PrintError("VirtIO devices require a PCI Bus");
615         return -1;
616     }
617
618
619     virtio_state  = (struct virtio_dev_state *)V3_Malloc(sizeof(struct virtio_dev_state));
620     memset(virtio_state, 0, sizeof(struct virtio_dev_state));
621
622     INIT_LIST_HEAD(&(virtio_state->dev_list));
623     virtio_state->pci_bus = pci_bus;
624
625
626     struct vm_device * dev = v3_allocate_device(dev_id, &dev_ops, virtio_state);
627     if (v3_attach_device(vm, dev) == -1) {
628         PrintError("Could not attach device %s\n", dev_id);
629         return -1;
630     }
631
632     if (v3_dev_add_blk_frontend(vm, dev_id, connect_fn, (void *)virtio_state) == -1) {
633         PrintError("Could not register %s as block frontend\n", dev_id);
634         return -1;
635     }
636
637     return 0;
638 }
639
640
641 device_register("LNX_VIRTIO_BLK", virtio_init)