From: Jack Lange Date: Fri, 31 Jul 2009 05:15:56 +0000 (-0500) Subject: updated the block device interfaces to be more generic, and have the virtio block... X-Git-Url: http://v3vee.org/palacios/gitweb/gitweb.cgi?p=palacios.git;a=commitdiff_plain;h=942df9bb1e2570764d24c74f797247536639502e updated the block device interfaces to be more generic, and have the virtio block device just about working. --- diff --git a/palacios/include/devices/block_dev.h b/palacios/include/devices/block_dev.h new file mode 100644 index 0000000..574c5af --- /dev/null +++ b/palacios/include/devices/block_dev.h @@ -0,0 +1,66 @@ + +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __DEVICES_BLOCK_DEV_H__ +#define __DEVICES_BLOCK_DEV_H__ + +#ifdef __V3VEE__ + + + +#define ATAPI_BLOCK_SIZE 2048 +#define HD_SECTOR_SIZE 512 + + +struct v3_hd_ops { + uint64_t (*get_capacity)(void * private_data); + // Reads always operate on 2048 byte blocks + int (*read)(uint8_t * buf, int sector_count, uint64_t lba, void * private_data); + int (*write)(uint8_t * buf, int sector_count, uint64_t lba, void * private_data); +}; + + + +struct v3_cd_ops { + uint32_t (*get_capacity)(void * private_data); + // Reads always operate on 2048 byte blocks + int (*read)(uint8_t * buf, int block_count, uint64_t lba, void * private_data); +}; + + +typedef enum {BLOCK_NONE, BLOCK_DISK, BLOCK_CDROM} v3_block_type_t; + + + +static const char * block_dev_type_strs[] = {"NONE", "HARDDISK", "CDROM" }; + +static inline const char * v3_block_type_to_str(v3_block_type_t type) { + if (type > BLOCK_CDROM) { + return NULL; + } + return block_dev_type_strs[type]; +} + + + +#endif + + +#endif diff --git a/palacios/include/devices/ide.h b/palacios/include/devices/ide.h index 88c593e..b778f1e 100644 --- a/palacios/include/devices/ide.h +++ b/palacios/include/devices/ide.h @@ -22,6 +22,7 @@ #ifdef __V3VEE__ +#include struct ide_cfg { char pci[32]; @@ -29,38 +30,18 @@ struct ide_cfg { }; -#define ATAPI_BLOCK_SIZE 2048 -#define IDE_SECTOR_SIZE 512 - -typedef enum {IDE_NONE, IDE_DISK, IDE_CDROM} v3_ide_dev_type_t; - -struct v3_ide_cd_ops { - uint32_t (*get_capacity)(void * private_data); - // Reads always operate on 2048 byte blocks - int (*read)(uint8_t * buf, int block_count, uint64_t lba, void * private_data); -}; - - -struct v3_ide_hd_ops { - uint64_t (*get_capacity)(void * private_data); - // Reads always operate on 2048 byte blocks - int (*read)(uint8_t * buf, int sector_count, uint64_t lba, void * private_data); - int (*write)(uint8_t * buf, int sector_count, uint64_t lba, void * private_data); -}; - - int v3_ide_register_cdrom(struct vm_device * ide, uint_t bus_num, uint_t drive_num, char * drive_name, - struct v3_ide_cd_ops * ops, + struct v3_cd_ops * ops, void * private_data); int v3_ide_register_harddisk(struct vm_device * ide, uint_t bus_num, uint_t drive_num, char * drive_name, - struct v3_ide_hd_ops * ops, + struct v3_hd_ops * ops, void * private_data); diff --git a/palacios/include/devices/lnx_virtio_blk.h b/palacios/include/devices/lnx_virtio_blk.h new file mode 100644 index 0000000..1bcf08c --- /dev/null +++ b/palacios/include/devices/lnx_virtio_blk.h @@ -0,0 +1,40 @@ +/* + * This file is part of the Palacios Virtual Machine Monitor developed + * by the V3VEE Project with funding from the United States National + * Science Foundation and the Department of Energy. + * + * The V3VEE Project is a joint project between Northwestern University + * and the University of New Mexico. You can find out more at + * http://www.v3vee.org + * + * Copyright (c) 2008, Jack Lange + * Copyright (c) 2008, The V3VEE Project + * All rights reserved. + * + * Author: Jack Lange + * + * This is free software. You are permitted to use, + * redistribute, and modify it as specified in the file "V3VEE_LICENSE". + */ + +#ifndef __DEVICES_LNX_VIRTIO_BLK_H__ +#define __DEVICES_LNX_VIRTIO_BLK_H__ + +#ifdef __V3VEE__ + +#include + +int v3_virtio_register_cdrom(struct vm_device * dev, + struct v3_cd_ops * ops, + void * private_data); + + +int v3_virtio_register_harddisk(struct vm_device * dev, + struct v3_hd_ops * ops, + void * private_data); + + +#endif + + +#endif diff --git a/palacios/include/devices/lnx_virtio_pci.h b/palacios/include/devices/lnx_virtio_pci.h index d60f05d..b3bb6c8 100644 --- a/palacios/include/devices/lnx_virtio_pci.h +++ b/palacios/include/devices/lnx_virtio_pci.h @@ -48,8 +48,25 @@ #define VIRTIO_STATUS_PORT 18 #define VIRTIO_ISR_PORT 19 +#define VIRTIO_PAGE_SHIFT 12 +/* Descriptor flags */ +/* This marks a buffer as continuing via the next field. */ +#define VIRTIO_NEXT_FLAG 0x1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VIRTIO_WR_ONLY_FLAG 0x2 + + +/* Used Flags */ +/* This means don't notify other side when buffer added. */ +#define VRING_NO_NOTIFY_FLAG 0x1 + + +/* Avail Flags */ +/* This means don't interrupt guest when buffer consumed. */ +#define VIRTIO_NO_IRQ_FLAG 0x1 + /* The virtio configuration space is a hybrid io/memory mapped model * All IO is done via IO port accesses * The IO ports access fields in a virtio data structure, and the base io port @@ -97,6 +114,23 @@ struct vring_used { +struct virtio_queue { + uint16_t queue_size; + + uint16_t cur_avail_idx; + + addr_t ring_desc_addr; + addr_t ring_avail_addr; + addr_t ring_used_addr; + + + struct vring_desc * desc; // We can treat this as an array... + struct vring_avail * avail; + struct vring_used * used; + + uint32_t pfn; +}; + #endif diff --git a/palacios/src/devices/ata.h b/palacios/src/devices/ata.h index 0bb0022..5cdb737 100644 --- a/palacios/src/devices/ata.h +++ b/palacios/src/devices/ata.h @@ -43,8 +43,8 @@ static void ata_identify_device(struct ide_drive * drive) { // Make it the simplest drive possible (1 head, 1 cyl, 1 sect/track) drive_id->num_cylinders = drive->num_cylinders; drive_id->num_heads = drive->num_heads; - drive_id->bytes_per_track = drive->num_sectors * IDE_SECTOR_SIZE; - drive_id->bytes_per_sector = IDE_SECTOR_SIZE; + drive_id->bytes_per_track = drive->num_sectors * HD_SECTOR_SIZE; + drive_id->bytes_per_sector = HD_SECTOR_SIZE; drive_id->sectors_per_track = drive->num_sectors; @@ -171,7 +171,7 @@ static int ata_get_lba(struct vm_device * dev, struct ide_channel * channel, uin drive->hd_ops->get_capacity(drive->private_data)) { PrintError("IDE: request size exceeds disk capacity (lba=%d) (sect_cnt=%d) (ReadEnd=%d) (capacity=%p)\n", lba_addr.addr, sect_cnt, - lba_addr.addr + (sect_cnt * IDE_SECTOR_SIZE), + lba_addr.addr + (sect_cnt * HD_SECTOR_SIZE), (void *)(addr_t)(drive->hd_ops->get_capacity(drive->private_data))); return -1; } @@ -198,7 +198,7 @@ static int ata_read_sectors(struct vm_device * dev, struct ide_channel * channel return -1; } - drive->transfer_length = sect_cnt * IDE_SECTOR_SIZE; + drive->transfer_length = sect_cnt * HD_SECTOR_SIZE; drive->transfer_index = 0; channel->status.busy = 0; diff --git a/palacios/src/devices/ide.c b/palacios/src/devices/ide.c index 23b2291..60be9de 100644 --- a/palacios/src/devices/ide.c +++ b/palacios/src/devices/ide.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ide-types.h" #include "atapi-types.h" @@ -95,18 +96,6 @@ static inline const char * dma_port_to_str(uint16_t port) { } -static const char * ide_dev_type_strs[] = {"NONE", "HARDDISK", "CDROM" }; - - -static inline const char * device_type_to_str(v3_ide_dev_type_t type) { - if (type > 2) { - return NULL; - } - - return ide_dev_type_strs[type]; -} - - struct ide_cd_state { struct atapi_sense_data sense; @@ -131,11 +120,11 @@ struct ide_hd_state { struct ide_drive { // Command Registers - v3_ide_dev_type_t drive_type; + v3_block_type_t drive_type; union { - struct v3_ide_cd_ops * cd_ops; - struct v3_ide_hd_ops * hd_ops; + struct v3_cd_ops * cd_ops; + struct v3_hd_ops * hd_ops; }; @@ -304,7 +293,7 @@ static void drive_reset(struct ide_drive * drive) { PrintDebug("Resetting drive %s\n", drive->model); - if (drive->drive_type == IDE_CDROM) { + if (drive->drive_type == BLOCK_CDROM) { drive->cylinder = 0xeb14; } else { drive->cylinder = 0x0000; @@ -439,15 +428,15 @@ static int dma_read(struct vm_device * dev, struct ide_channel * channel) { while (prd_bytes_left > 0) { uint_t bytes_to_write = 0; - if (drive->drive_type == IDE_DISK) { - bytes_to_write = (prd_bytes_left > IDE_SECTOR_SIZE) ? IDE_SECTOR_SIZE : prd_bytes_left; + if (drive->drive_type == BLOCK_DISK) { + bytes_to_write = (prd_bytes_left > HD_SECTOR_SIZE) ? HD_SECTOR_SIZE : prd_bytes_left; if (ata_read(dev, channel, drive->data_buf, 1) == -1) { PrintError("Failed to read next disk sector\n"); return -1; } - } else if (drive->drive_type == IDE_CDROM) { + } else if (drive->drive_type == BLOCK_CDROM) { if (atapi_cmd_is_data_op(drive->cd_state.atapi_cmd)) { bytes_to_write = (prd_bytes_left > ATAPI_BLOCK_SIZE) ? ATAPI_BLOCK_SIZE : prd_bytes_left; @@ -486,12 +475,12 @@ static int dma_read(struct vm_device * dev, struct ide_channel * channel) { channel->dma_tbl_index++; - if (drive->drive_type == IDE_DISK) { - if (drive->transfer_index % IDE_SECTOR_SIZE) { + if (drive->drive_type == BLOCK_DISK) { + if (drive->transfer_index % HD_SECTOR_SIZE) { PrintError("We currently don't handle sectors that span PRD descriptors\n"); return -1; } - } else if (drive->drive_type == IDE_CDROM) { + } else if (drive->drive_type == BLOCK_CDROM) { if (atapi_cmd_is_data_op(drive->cd_state.atapi_cmd)) { if (drive->transfer_index % ATAPI_BLOCK_SIZE) { PrintError("We currently don't handle ATAPI BLOCKS that span PRD descriptors\n"); @@ -571,7 +560,7 @@ static int dma_write(struct vm_device * dev, struct ide_channel * channel) { uint_t bytes_to_write = 0; - bytes_to_write = (prd_bytes_left > IDE_SECTOR_SIZE) ? IDE_SECTOR_SIZE : prd_bytes_left; + bytes_to_write = (prd_bytes_left > HD_SECTOR_SIZE) ? HD_SECTOR_SIZE : prd_bytes_left; ret = read_guest_pa_memory(dev->vm, prd_entry.base_addr + prd_offset, bytes_to_write, drive->data_buf); @@ -599,7 +588,7 @@ static int dma_write(struct vm_device * dev, struct ide_channel * channel) { channel->dma_tbl_index++; - if (drive->transfer_index % IDE_SECTOR_SIZE) { + if (drive->transfer_index % HD_SECTOR_SIZE) { PrintError("We currently don't handle sectors that span PRD descriptors\n"); return -1; } @@ -789,7 +778,7 @@ static int write_cmd_port(ushort_t port, void * src, uint_t length, struct vm_de switch (channel->cmd_reg) { case 0xa1: // ATAPI Identify Device Packet - if (drive->drive_type != IDE_CDROM) { + if (drive->drive_type != BLOCK_CDROM) { drive_reset(drive); // JRL: Should we abort here? @@ -805,7 +794,7 @@ static int write_cmd_port(ushort_t port, void * src, uint_t length, struct vm_de } break; case 0xec: // Identify Device - if (drive->drive_type != IDE_DISK) { + if (drive->drive_type != BLOCK_DISK) { drive_reset(drive); // JRL: Should we abort here? @@ -821,7 +810,7 @@ static int write_cmd_port(ushort_t port, void * src, uint_t length, struct vm_de break; case 0xa0: // ATAPI Command Packet - if (drive->drive_type != IDE_CDROM) { + if (drive->drive_type != BLOCK_CDROM) { ide_abort_command(dev, channel); } @@ -868,7 +857,7 @@ static int write_cmd_port(ushort_t port, void * src, uint_t length, struct vm_de drive->hd_state.cur_sector_num = 1; - drive->transfer_length = sect_cnt * IDE_SECTOR_SIZE; + drive->transfer_length = sect_cnt * HD_SECTOR_SIZE; drive->transfer_index = 0; if (channel->dma_status.active == 1) { @@ -891,7 +880,7 @@ static int write_cmd_port(ushort_t port, void * src, uint_t length, struct vm_de drive->hd_state.cur_sector_num = 1; - drive->transfer_length = sect_cnt * IDE_SECTOR_SIZE; + drive->transfer_length = sect_cnt * HD_SECTOR_SIZE; drive->transfer_index = 0; if (channel->dma_status.active == 1) { @@ -1011,7 +1000,7 @@ static int write_data_port(ushort_t port, void * src, uint_t length, struct vm_d static int read_hd_data(uint8_t * dst, uint_t length, struct vm_device * dev, struct ide_channel * channel) { struct ide_drive * drive = get_selected_drive(channel); - int data_offset = drive->transfer_index % IDE_SECTOR_SIZE; + int data_offset = drive->transfer_index % HD_SECTOR_SIZE; @@ -1048,7 +1037,7 @@ static int read_hd_data(uint8_t * dst, uint_t length, struct vm_device * dev, st * cur_sector_num is configured depending on the operation we are currently running * We also trigger an interrupt if this is the last byte to transfer, regardless of sector count */ - if (((drive->transfer_index % (IDE_SECTOR_SIZE * drive->hd_state.cur_sector_num)) == 0) || + if (((drive->transfer_index % (HD_SECTOR_SIZE * drive->hd_state.cur_sector_num)) == 0) || (drive->transfer_index == drive->transfer_length)) { if (drive->transfer_index < drive->transfer_length) { // An increment is complete, but there is still more data to be transferred... @@ -1175,12 +1164,12 @@ static int ide_read_data_port(ushort_t port, void * dst, uint_t length, struct v return read_drive_id((uint8_t *)dst, length, dev, channel); } - if (drive->drive_type == IDE_CDROM) { + if (drive->drive_type == BLOCK_CDROM) { if (read_cd_data((uint8_t *)dst, length, dev, channel) == -1) { PrintError("IDE: Could not read CD Data\n"); return -1; } - } else if (drive->drive_type == IDE_DISK) { + } else if (drive->drive_type == BLOCK_DISK) { if (read_hd_data((uint8_t *)dst, length, dev, channel) == -1) { PrintError("IDE: Could not read HD Data\n"); return -1; @@ -1259,7 +1248,7 @@ static int write_port_std(ushort_t port, void * src, uint_t length, struct vm_de drive = get_selected_drive(channel); // Selecting a non-present device is a no-no - if (drive->drive_type == IDE_NONE) { + if (drive->drive_type == BLOCK_NONE) { PrintDebug("Attempting to select a non-present drive\n"); channel->error_reg.abort = 1; channel->status.error = 1; @@ -1296,7 +1285,7 @@ static int read_port_std(ushort_t port, void * dst, uint_t length, struct vm_dev // if no drive is present just return 0 + reserved bits - if (drive->drive_type == IDE_NONE) { + if (drive->drive_type == BLOCK_NONE) { if ((port == PRI_DRV_SEL_PORT) || (port == SEC_DRV_SEL_PORT)) { *(uint8_t *)dst = 0xa0; @@ -1367,7 +1356,7 @@ static void init_drive(struct ide_drive * drive) { drive->sector_num = 0x01; drive->cylinder = 0x0000; - drive->drive_type = IDE_NONE; + drive->drive_type = BLOCK_NONE; memset(drive->model, 0, sizeof(drive->model)); @@ -1625,7 +1614,7 @@ int v3_ide_get_geometry(struct vm_device * ide_dev, int channel_num, int drive_n struct ide_channel * channel = &(ide->channels[channel_num]); struct ide_drive * drive = &(channel->drives[drive_num]); - if (drive->drive_type == IDE_NONE) { + if (drive->drive_type == BLOCK_NONE) { return -1; } @@ -1643,7 +1632,7 @@ int v3_ide_register_cdrom(struct vm_device * ide_dev, uint_t bus_num, uint_t drive_num, char * dev_name, - struct v3_ide_cd_ops * ops, + struct v3_cd_ops * ops, void * private_data) { struct ide_internal * ide = (struct ide_internal *)(ide_dev->private_data); @@ -1656,7 +1645,7 @@ int v3_ide_register_cdrom(struct vm_device * ide_dev, channel = &(ide->channels[bus_num]); drive = &(channel->drives[drive_num]); - if (drive->drive_type != IDE_NONE) { + if (drive->drive_type != BLOCK_NONE) { PrintError("Device slot (bus=%d, drive=%d) already occupied\n", bus_num, drive_num); return -1; } @@ -1668,7 +1657,7 @@ int v3_ide_register_cdrom(struct vm_device * ide_dev, } - drive->drive_type = IDE_CDROM; + drive->drive_type = BLOCK_CDROM; drive->cd_ops = ops; @@ -1687,7 +1676,7 @@ int v3_ide_register_harddisk(struct vm_device * ide_dev, uint_t bus_num, uint_t drive_num, char * dev_name, - struct v3_ide_hd_ops * ops, + struct v3_hd_ops * ops, void * private_data) { struct ide_internal * ide = (struct ide_internal *)(ide_dev->private_data); @@ -1700,14 +1689,14 @@ int v3_ide_register_harddisk(struct vm_device * ide_dev, channel = &(ide->channels[bus_num]); drive = &(channel->drives[drive_num]); - if (drive->drive_type != IDE_NONE) { + if (drive->drive_type != BLOCK_NONE) { PrintError("Device slot (bus=%d, drive=%d) already occupied\n", bus_num, drive_num); return -1; } strncpy(drive->model, dev_name, sizeof(drive->model) - 1); - drive->drive_type = IDE_DISK; + drive->drive_type = BLOCK_DISK; drive->hd_state.accessed = 0; drive->hd_state.mult_sector_num = 1; diff --git a/palacios/src/devices/lnx_virtio_blk.c b/palacios/src/devices/lnx_virtio_blk.c index cdbad38..07870b2 100644 --- a/palacios/src/devices/lnx_virtio_blk.c +++ b/palacios/src/devices/lnx_virtio_blk.c @@ -20,11 +20,31 @@ #include #include #include - +#include +#include +#include #include +#define BLK_CAPACITY_PORT 20 +#define BLK_MAX_SIZE_PORT 28 +#define BLK_MAX_SEG_PORT 32 +#define BLK_CYLINDERS_PORT 36 +#define BLK_HEADS_PORT 38 +#define BLK_SECTS_PORT 39 + +#define BLK_IN_REQ 0 +#define BLK_OUT_REQ 1 +#define BLK_SCSI_CMD 2 + +#define BLK_BARRIER_FLAG 0x80000000 + +#define BLK_STATUS_OK 0 +#define BLK_STATUS_ERR 1 +#define BLK_STATUS_NOT_SUPPORTED 2 + + struct blk_config { uint64_t capacity; uint32_t max_size; @@ -36,6 +56,13 @@ struct blk_config { +struct blk_op_hdr { + uint32_t type; + uint32_t prior; + uint64_t sector; +} __attribute__((packed)); + +#define QUEUE_SIZE 128 /* Host Feature flags */ #define VIRTIO_BARRIER 0x01 /* Does host support barriers? */ @@ -45,22 +72,233 @@ struct blk_config { + struct virtio_blk_state { struct blk_config block_cfg; struct virtio_config virtio_cfg; struct vm_device * pci_bus; struct pci_device * pci_dev; - - struct virtio_device * virtio_dev; // the virtio device struction for _this_ device + struct virtio_queue queue; + + union { + struct v3_cd_ops * cd_ops; + struct v3_hd_ops * hd_ops; + }; + + v3_block_type_t block_type; + void * backend_data; int io_range_size; }; +static int virtio_free(struct vm_device * dev) { + return -1; +} + +static int virtio_reset(struct vm_device * dev) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + + virtio->queue.ring_desc_addr = 0; + virtio->queue.ring_avail_addr = 0; + virtio->queue.ring_used_addr = 0; + virtio->queue.pfn = 0; + virtio->queue.cur_avail_idx = 0; + + virtio->virtio_cfg.status = 0; + virtio->virtio_cfg.pci_isr = 0; + + return 0; +} + +static int handle_read_op(struct vm_device * dev, uint8_t * buf, uint64_t sector, uint32_t len) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + + if (virtio->block_type == BLOCK_DISK) { + if (len % HD_SECTOR_SIZE) { + PrintError("Write of something that is not a sector len %d, mod=%d\n", len, len % HD_SECTOR_SIZE); + return -1; + } + + + PrintDebug("Reading Disk\n"); + + return virtio->hd_ops->read(buf, len / HD_SECTOR_SIZE, sector * HD_SECTOR_SIZE, virtio->backend_data); + } else if (virtio->block_type == BLOCK_CDROM) { + if (len % ATAPI_BLOCK_SIZE) { + PrintError("Write of something that is not an ATAPI block len %d, mod=%d\n", len, len % ATAPI_BLOCK_SIZE); + return -1; + } + + return virtio->cd_ops->read(buf, len / ATAPI_BLOCK_SIZE, sector * ATAPI_BLOCK_SIZE, virtio->backend_data); + + } + + return -1; +} + + +static int handle_write_op(struct vm_device * dev, uint8_t * buf, uint64_t sector, uint32_t len) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + + + if (virtio->block_type == BLOCK_DISK) { + if (len % HD_SECTOR_SIZE) { + PrintError("Write of something that is not a sector len %d, mod=%d\n", len, len % HD_SECTOR_SIZE); + return -1; + } + + PrintDebug("Writing Disk\n"); + + return virtio->hd_ops->write(buf, len / HD_SECTOR_SIZE, sector * HD_SECTOR_SIZE, virtio->backend_data); + } + + return -1; +} + + +static int handle_block_op(struct vm_device * dev, struct vring_desc * hdr_desc, + struct vring_desc * buf_desc, struct vring_desc * status_desc) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + struct blk_op_hdr * hdr = NULL; + uint8_t * buf = NULL; + uint8_t * status = NULL; + + + PrintDebug("Handling Block op\n"); + + if (guest_pa_to_host_va(dev->vm, hdr_desc->addr_gpa, (addr_t *)&(hdr)) == -1) { + PrintError("Could not translate block header address\n"); + return -1; + } + + + if (guest_pa_to_host_va(dev->vm, buf_desc->addr_gpa, (addr_t *)&(buf)) == -1) { + PrintError("Could not translate buffer address\n"); + return -1; + } + + if (guest_pa_to_host_va(dev->vm, status_desc->addr_gpa, (addr_t *)&(status)) == -1) { + PrintError("Could not translate status address\n"); + return -1; + } + + if (hdr->type == BLK_IN_REQ) { + if (virtio->block_type != BLOCK_NONE) { + if (handle_read_op(dev, buf, hdr->sector, buf_desc->length) == -1) { + *status = BLK_STATUS_ERR; + } else { + *status = BLK_STATUS_OK; + } + } else { + *status = BLK_STATUS_NOT_SUPPORTED; + } + + } else if (hdr->type == BLK_OUT_REQ) { + if (virtio->block_type == BLOCK_DISK) { + if (handle_write_op(dev, buf, hdr->sector, buf_desc->length) == -1) { + *status = BLK_STATUS_ERR; + } else { + *status = BLK_STATUS_OK; + } + } else { + *status = BLK_STATUS_NOT_SUPPORTED; + } + } else if (hdr->type == BLK_SCSI_CMD) { + *status = BLK_STATUS_NOT_SUPPORTED; + } + + + + + return 0; +} + + + +static int handle_kick(struct vm_device * dev) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + struct virtio_queue * q = &(virtio->queue); + + + PrintDebug("VIRTIO KICK: cur_index=%d, avail_index=%d\n", q->cur_avail_idx, q->avail->index); + + while (q->cur_avail_idx < q->avail->index) { + struct vring_desc * hdr_desc = NULL; + struct vring_desc * buf_desc = NULL; + struct vring_desc * status_desc = NULL; + uint16_t chain_idx = q->avail->ring[q->cur_avail_idx]; + uint32_t req_len = 0; + int chained = 1; + + PrintDebug("chained=%d, Chain Index=%d\n", chained, chain_idx); + + while (chained) { + hdr_desc = &(q->desc[chain_idx]); + + PrintDebug("Header Descriptor gpa=%p, len=%d, flags=%x, next=%d\n", + (void *)(hdr_desc->addr_gpa), hdr_desc->length, hdr_desc->flags, hdr_desc->next); + + + if (!(hdr_desc->flags & VIRTIO_NEXT_FLAG)) { + PrintError("Block operations must chain a buffer descriptor\n"); + return -1; + } + + buf_desc = &(q->desc[hdr_desc->next]); + + + PrintDebug("Buffer Descriptor gpa=%p, len=%d, flags=%x, next=%d\n", + (void *)(buf_desc->addr_gpa), buf_desc->length, buf_desc->flags, buf_desc->next); + + + if (!(buf_desc->flags & VIRTIO_NEXT_FLAG)) { + PrintError("Block operatoins must chain a status descriptor\n"); + return -1; + } + + status_desc = &(q->desc[buf_desc->next]); + + // We detect whether we are chained here... + if (status_desc->flags & VIRTIO_NEXT_FLAG) { + chained = 1; + chain_idx = status_desc->next; + } else { + chained = 0; + } + + PrintDebug("Status Descriptor gpa=%p, len=%d, flags=%x, next=%d\n", + (void *)(status_desc->addr_gpa), status_desc->length, status_desc->flags, status_desc->next); + + + if (handle_block_op(dev, hdr_desc, buf_desc, status_desc) == -1) { + PrintError("Error handling block operation\n"); + return -1; + } + + req_len += (buf_desc->length + status_desc->length); + + } + + q->used->ring[q->used->index].id = q->cur_avail_idx; + q->used->ring[q->used->index].length = req_len; // What do we set this to???? + + q->used->index = (q->used->index + 1) % (QUEUE_SIZE * sizeof(struct vring_desc));; + q->cur_avail_idx = (q->cur_avail_idx + 1) % (QUEUE_SIZE * sizeof(struct vring_desc)); + } + + if (!(q->avail->flags & VIRTIO_NO_IRQ_FLAG)) { + PrintDebug("Raising IRQ %d\n", virtio->pci_dev->config_header.intr_line); + v3_pci_raise_irq(virtio->pci_bus, 0, virtio->pci_dev); + } + + return 0; +} + static int virtio_io_write(uint16_t port, void * src, uint_t length, struct vm_device * dev) { struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; int port_idx = port % virtio->io_range_size; @@ -72,17 +310,84 @@ static int virtio_io_write(uint16_t port, void * src, uint_t length, struct vm_d switch (port_idx) { + case GUEST_FEATURES_PORT: + if (length != 4) { + PrintError("Illegal write length for guest features\n"); + return -1; + } + + virtio->virtio_cfg.guest_features = *(uint32_t *)src; + + break; + case VRING_PG_NUM_PORT: + if (length == 4) { + addr_t pfn = *(uint32_t *)src; + addr_t page_addr = (pfn << VIRTIO_PAGE_SHIFT); + + + virtio->queue.pfn = pfn; + + virtio->queue.ring_desc_addr = page_addr ; + virtio->queue.ring_avail_addr = page_addr + (QUEUE_SIZE * sizeof(struct vring_desc)); + virtio->queue.ring_used_addr = ( virtio->queue.ring_avail_addr + \ + sizeof(struct vring_avail) + \ + (QUEUE_SIZE * sizeof(uint16_t))); + + // round up to next page boundary. + virtio->queue.ring_used_addr = (virtio->queue.ring_used_addr + 0xfff) & ~0xfff; + + if (guest_pa_to_host_va(dev->vm, virtio->queue.ring_desc_addr, (addr_t *)&(virtio->queue.desc)) == -1) { + PrintError("Could not translate ring descriptor address\n"); + return -1; + } + + + if (guest_pa_to_host_va(dev->vm, virtio->queue.ring_avail_addr, (addr_t *)&(virtio->queue.avail)) == -1) { + PrintError("Could not translate ring available address\n"); + return -1; + } + + + if (guest_pa_to_host_va(dev->vm, virtio->queue.ring_used_addr, (addr_t *)&(virtio->queue.used)) == -1) { + PrintError("Could not translate ring used address\n"); + return -1; + } + + PrintDebug("RingDesc=%p, Avail=%p, Used=%p\n", + (void *)(virtio->queue.ring_desc_addr), + (void *)(virtio->queue.ring_avail_addr), + (void *)(virtio->queue.ring_used_addr)); + + } else { + PrintError("Illegal write length for page frame number\n"); + return -1; + } + break; + case VRING_Q_SEL_PORT: + virtio->virtio_cfg.vring_queue_selector = *(uint16_t *)src; + + if (virtio->virtio_cfg.vring_queue_selector != 0) { + PrintError("Virtio Block device only uses 1 queue, selected %d\n", + virtio->virtio_cfg.vring_queue_selector); + return -1; + } + + break; case VRING_Q_NOTIFY_PORT: - // handle output - PrintError("Notification\n"); - return -1; + PrintDebug("Handling Kick\n"); + if (handle_kick(dev) == -1) { + PrintError("Could not handle Block Notification\n"); + return -1; + } break; case VIRTIO_STATUS_PORT: + virtio->virtio_cfg.status = *(uint8_t *)src; + if (virtio->virtio_cfg.status == 0) { PrintDebug("Resetting device\n"); - return -1; - //reset + virtio_reset(dev); } + break; default: return -1; @@ -105,19 +410,62 @@ static int virtio_io_read(uint16_t port, void * dst, uint_t length, struct vm_de port, port_idx, length); switch (port_idx) { - // search for device.... - // call and return dev config read + case HOST_FEATURES_PORT: + if (length != 4) { + PrintError("Illegal read length for host features\n"); + return -1; + } + + *(uint32_t *)dst = virtio->virtio_cfg.host_features; + + break; + case VRING_PG_NUM_PORT: + if (length != 4) { + PrintError("Illegal read length for page frame number\n"); + return -1; + } + + *(uint32_t *)dst = virtio->queue.pfn; + + break; + case VRING_SIZE_PORT: + if (length != 2) { + PrintError("Illegal read length for vring size\n"); + return -1; + } + + *(uint16_t *)dst = virtio->queue.queue_size; + + break; + + case VIRTIO_STATUS_PORT: + if (length != 1) { + PrintError("Illegal read length for status\n"); + return -1; + } + + *(uint8_t *)dst = virtio->virtio_cfg.status; + break; + default: - return -1; + if ( (port_idx >= sizeof(struct virtio_config)) && + (port_idx < (sizeof(struct virtio_config) + sizeof(struct blk_config))) ) { + + uint8_t * cfg_ptr = (uint8_t *)&(virtio->block_cfg); + memcpy(dst, cfg_ptr, length); + + } else { + PrintError("Read of Unhandled Virtio Read\n"); + return -1; + } + + break; } - return length; -} -static int virtio_free(struct vm_device * dev) { - return -1; + return length; } @@ -133,6 +481,31 @@ static struct v3_device_ops dev_ops = { +int v3_virtio_register_cdrom(struct vm_device * dev, struct v3_cd_ops * ops, void * private_data) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + + virtio->block_type = BLOCK_CDROM; + virtio->cd_ops = ops; + virtio->backend_data = private_data; + + virtio->block_cfg.capacity = ops->get_capacity(private_data); + + return 0; +} + + +int v3_virtio_register_harddisk(struct vm_device * dev, struct v3_hd_ops * ops, void * private_data) { + struct virtio_blk_state * virtio = (struct virtio_blk_state *)dev->private_data; + + virtio->block_type = BLOCK_DISK; + virtio->hd_ops = ops; + virtio->backend_data = private_data; + + virtio->block_cfg.capacity = ops->get_capacity(private_data); + + return 0; +} + static int virtio_init(struct guest_info * vm, void * cfg_data) { @@ -222,10 +595,20 @@ static int virtio_init(struct guest_info * vm, void * cfg_data) { virtio_state->pci_dev = pci_dev; virtio_state->pci_bus = pci_bus; - - /* Block configuration */ } - + + /* Block configuration */ + virtio_state->virtio_cfg.host_features = VIRTIO_SEG_MAX; + + // Virtio Block only uses one queue + virtio_state->queue.queue_size = QUEUE_SIZE; + + virtio_reset(dev); + + virtio_state->backend_data = NULL; + virtio_state->block_type = BLOCK_NONE; + virtio_state->hd_ops = NULL; + return 0; } diff --git a/palacios/src/devices/net_cd.c b/palacios/src/devices/net_cd.c index 26517bb..d8ea73e 100644 --- a/palacios/src/devices/net_cd.c +++ b/palacios/src/devices/net_cd.c @@ -158,7 +158,7 @@ static uint32_t cd_get_capacity(void * private_data) { return cd->capacity / ATAPI_BLOCK_SIZE; } -static struct v3_ide_cd_ops cd_ops = { +static struct v3_cd_ops cd_ops = { .read = cd_read, .get_capacity = cd_get_capacity, }; diff --git a/palacios/src/devices/net_hd.c b/palacios/src/devices/net_hd.c index bed3e42..8db6b97 100644 --- a/palacios/src/devices/net_hd.c +++ b/palacios/src/devices/net_hd.c @@ -97,8 +97,8 @@ static int recv_all(int socket, char * buf, int length) { static int hd_read(uint8_t * buf, int sector_count, uint64_t lba, void * private_data) { struct vm_device * hd_dev = (struct vm_device *)private_data; struct hd_state * hd = (struct hd_state *)(hd_dev->private_data); - int offset = lba * IDE_SECTOR_SIZE; - int length = sector_count * IDE_SECTOR_SIZE; + int offset = lba * HD_SECTOR_SIZE; + int length = sector_count * HD_SECTOR_SIZE; uint8_t status; uint32_t ret_len = 0; char nbd_cmd[4] = {0,0,0,0}; @@ -156,8 +156,8 @@ static int hd_read(uint8_t * buf, int sector_count, uint64_t lba, void * privat static int hd_write(uint8_t * buf, int sector_count, uint64_t lba, void * private_data) { struct vm_device * hd_dev = (struct vm_device *)private_data; struct hd_state * hd = (struct hd_state *)(hd_dev->private_data); - int offset = lba * IDE_SECTOR_SIZE; - int length = sector_count * IDE_SECTOR_SIZE; + int offset = lba * HD_SECTOR_SIZE; + int length = sector_count * HD_SECTOR_SIZE; uint8_t status; char nbd_cmd[4] = {0,0,0,0}; @@ -203,10 +203,10 @@ static uint64_t hd_get_capacity(void * private_data) { struct vm_device * hd_dev = (struct vm_device *)private_data; struct hd_state * hd = (struct hd_state *)(hd_dev->private_data); - return hd->capacity / IDE_SECTOR_SIZE; + return hd->capacity / HD_SECTOR_SIZE; } -static struct v3_ide_hd_ops hd_ops = { +static struct v3_hd_ops hd_ops = { .read = hd_read, .write = hd_write, .get_capacity = hd_get_capacity, diff --git a/palacios/src/devices/ram_cd.c b/palacios/src/devices/ram_cd.c index b6a1bf6..a139fc0 100644 --- a/palacios/src/devices/ram_cd.c +++ b/palacios/src/devices/ram_cd.c @@ -62,7 +62,7 @@ static uint32_t cd_get_capacity(void * private_data) { return cd->capacity / ATAPI_BLOCK_SIZE; } -static struct v3_ide_cd_ops cd_ops = { +static struct v3_cd_ops cd_ops = { .read = cd_read, .get_capacity = cd_get_capacity, }; diff --git a/palacios/src/devices/ram_hd.c b/palacios/src/devices/ram_hd.c index 49bf510..6c61ad4 100644 --- a/palacios/src/devices/ram_hd.c +++ b/palacios/src/devices/ram_hd.c @@ -43,8 +43,8 @@ struct hd_state { static int hd_read(uint8_t * buf, int sector_count, uint64_t lba, void * private_data) { struct vm_device * hd_dev = (struct vm_device *)private_data; struct hd_state * hd = (struct hd_state *)(hd_dev->private_data); - int offset = lba * IDE_SECTOR_SIZE; - int length = sector_count * IDE_SECTOR_SIZE; + int offset = lba * HD_SECTOR_SIZE; + int length = sector_count * HD_SECTOR_SIZE; // PrintDebug("Reading RAM HD at (LBA=%d) offset %d (length=%d)\n", (uint32_t)lba, offset, length); @@ -57,8 +57,8 @@ static int hd_read(uint8_t * buf, int sector_count, uint64_t lba, void * privat static int hd_write(uint8_t * buf, int sector_count, uint64_t lba, void * private_data) { struct vm_device * hd_dev = (struct vm_device *)private_data; struct hd_state * hd = (struct hd_state *)(hd_dev->private_data); - int offset = lba * IDE_SECTOR_SIZE; - int length = sector_count * IDE_SECTOR_SIZE; + int offset = lba * HD_SECTOR_SIZE; + int length = sector_count * HD_SECTOR_SIZE; memcpy((uint8_t *)(hd->disk_image + offset), buf, length); @@ -70,11 +70,11 @@ static uint64_t hd_get_capacity(void * private_data) { struct vm_device * hd_dev = (struct vm_device *)private_data; struct hd_state * hd = (struct hd_state *)(hd_dev->private_data); PrintDebug("Querying RAM HD capacity (bytes=%d) (ret = %d)\n", - hd->capacity, hd->capacity / IDE_SECTOR_SIZE); - return hd->capacity / IDE_SECTOR_SIZE; + hd->capacity, hd->capacity / HD_SECTOR_SIZE); + return hd->capacity / HD_SECTOR_SIZE; } -static struct v3_ide_hd_ops hd_ops = { +static struct v3_hd_ops hd_ops = { .read = hd_read, .write = hd_write, .get_capacity = hd_get_capacity, @@ -101,8 +101,8 @@ static int hd_init(struct guest_info * vm, void * cfg_data) { struct hd_state * hd = NULL; struct ram_hd_cfg * cfg = (struct ram_hd_cfg *)cfg_data; - if (cfg->size % IDE_SECTOR_SIZE) { - PrintError("HD image must be an integral of sector size (IDE_SECTOR_SIZE=%d)\n", IDE_SECTOR_SIZE); + if (cfg->size % HD_SECTOR_SIZE) { + PrintError("HD image must be an integral of sector size (HD_SECTOR_SIZE=%d)\n", HD_SECTOR_SIZE); return -1; } diff --git a/palacios/src/devices/sym_swap.c b/palacios/src/devices/sym_swap.c index 9bb795f..a3cfd77 100644 --- a/palacios/src/devices/sym_swap.c +++ b/palacios/src/devices/sym_swap.c @@ -19,24 +19,68 @@ #include #include +#include - - +#define SWAP_CAPACITY (4096 * HD_SECTOR_SIZE) struct swap_state { struct vm_device * blk_dev; + uint64_t capacity; + uint8_t * swap_space; + addr_t swap_base_addr; + }; +static uint64_t swap_get_capacity(void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct swap_state * swap = (struct swap_state *)(dev->private_data); + + return swap->capacity / HD_SECTOR_SIZE; +} + +static int swap_read(uint8_t * buf, int sector_count, uint64_t lba, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct swap_state * swap = (struct swap_state *)(dev->private_data); + int offset = lba * HD_SECTOR_SIZE; + int length = sector_count * HD_SECTOR_SIZE; + + PrintDebug("SymSwap: Reading %d bytes\n", length); + + memcpy(buf, swap->swap_space + offset, length); + + return 0; +} + +static int swap_write(uint8_t * buf, int sector_count, uint64_t lba, void * private_data) { + struct vm_device * dev = (struct vm_device *)private_data; + struct swap_state * swap = (struct swap_state *)(dev->private_data); + int offset = lba * HD_SECTOR_SIZE; + int length = sector_count * HD_SECTOR_SIZE; + + PrintDebug("SymSwap: Writing %d bytes\n", length); + + memcpy(swap->swap_space + offset, buf, length); + + return 0; +} + static int swap_free(struct vm_device * dev) { return -1; } +static struct v3_hd_ops hd_ops = { + .read = swap_read, + .write = swap_write, + .get_capacity = swap_get_capacity, +}; + + static struct v3_device_ops dev_ops = { .free = swap_free, @@ -67,6 +111,10 @@ static int swap_init(struct guest_info * vm, void * cfg_data) { swap = (struct swap_state *)V3_Malloc(sizeof(struct swap_state)); swap->blk_dev = virtio_blk; + swap->capacity = SWAP_CAPACITY; + + swap->swap_base_addr = (addr_t)V3_AllocPages(swap->capacity / 4096); + swap->swap_space = (uint8_t *)V3_VAddr((void *)(swap->swap_base_addr)); struct vm_device * dev = v3_allocate_device("SYM_SWAP", &dev_ops, swap); @@ -75,6 +123,9 @@ static int swap_init(struct guest_info * vm, void * cfg_data) { return -1; } + + v3_virtio_register_harddisk(virtio_blk, &hd_ops, dev); + return 0; } diff --git a/palacios/src/palacios/vmm_config.c b/palacios/src/palacios/vmm_config.c index deddd18..00123f3 100644 --- a/palacios/src/palacios/vmm_config.c +++ b/palacios/src/palacios/vmm_config.c @@ -219,6 +219,8 @@ static int setup_devices(struct guest_info * info, struct v3_vm_config * config_ v3_create_device(info, "LNX_VIRTIO_BLK", "PCI"); + v3_create_device(info, "SYM_SWAP", "LNX_VIRTIO_BLK"); + v3_create_device(info, "IDE", &ide_config); } else { v3_create_device(info, "IDE", NULL);