/* * This file is part of the Palacios Virtual Machine Monitor developed * by the V3VEE Project with funding from the United States National * Science Foundation and the Department of Energy. * * The V3VEE Project is a joint project between Northwestern University * and the University of New Mexico. You can find out more at * http://www.v3vee.org * * Copyright (c) 2008, The V3VEE Project * All rights reserved. * * Author: Yang Yang * Weixiao Fu * * This is free software. You are permitted to use, * redistribute, and modify it as specified in the file "V3VEE_LICENSE". */ #include #include #include #include #ifndef V3_CONFIG_DEBUG_QCOWDISK #undef PrintDebug #define PrintDebug(fmt, args...) #endif #define V3_PACKED __attribute__((packed)) #define QCOW2_MAGIC (('Q'<<24) | ('F'<<16) | ('I'<<8) | (0xfb)) #define QCOW2_COPIED (1ULL<<63) #define QCOW2_COMPRESSED (1ULL<<62) #define INIT_BUFF_SIZE (512) #define ERROR(...) PrintError(VM_NONE,VCORE_NONE,"qcow2: " __VA_ARGS__) #define DEBUG(...) PrintDebug(VM_NONE,VCORE_NONE,"qcow2: " __VA_ARGS__) #define INFO(...) V3_Print(VM_NONE,VCORE_NONE,"qcow2: " __VA_ARGS__) // the header structure for QCOW2 typedef struct v3_qcow2_header { uint32_t magic; uint32_t version; uint64_t backing_file_offset; uint32_t backing_file_size; uint32_t cluster_bits; uint64_t size; uint32_t crypt_method; uint32_t l1_size; uint64_t l1_table_offset; uint64_t refcount_table_offset; uint32_t refcount_table_clusters; uint32_t nb_snapshots; uint64_t snapshots_offset; } V3_PACKED v3_qcow2_header_t; // the header structure for each QCOW2 snapshot typedef struct v3_qcow2_snapshot_header { uint64_t l1_table_offset; uint32_t l1_size; uint16_t id_str_size; uint16_t name_size; uint32_t date_sec; uint32_t date_nsec; uint64_t vm_clock_nsec; uint32_t vm_state_size; uint32_t extra_data_size; } V3_PACKED v3_qcow2_snapshot_header_t; // the private structure used by QCOW2 implementation typedef struct v3_qcow2 { v3_file_t fd; struct v3_qcow2 *backing_qcow2; char *backing_file_name; uint64_t cluster_size; uint32_t l1_bits; uint64_t l1_mask; uint32_t l2_bits; uint64_t l2_mask; uint32_t refcount_block_bits; uint64_t refcount_block_mask; uint32_t refcount_table_bits; uint64_t refcount_table_mask; uint64_t free_cluster_index; v3_qcow2_header_t header; } v3_qcow2_t; typedef struct v3_qcow2_table_entry { uint64_t offset: 62; uint8_t compressed: 1; uint8_t copied: 1; } v3_qcow2_table_entry_t; // our implementations for Big/Little Endian conversion static inline uint16_t be16toh(uint16_t v) { return ((v&0xff)<<8) | ((v&0xff00)>>8); } static inline uint32_t be32toh(uint32_t v) { return (((uint32_t)be16toh(v&0x0000ffffU))<<16) | (uint32_t)be16toh((v&0xffff0000U)>>16); } static inline uint64_t be64toh(uint64_t v) { return (((uint64_t)be32toh(v&0x00000000ffffffffU))<<32) | (uint64_t)be32toh((v&0xffffffff00000000U)>>32); } static inline uint16_t htobe16(uint16_t v) { return be16toh(v); } static inline uint64_t htobe64(uint64_t v) { return be64toh(v); } uint64_t v3_qcow2_get_capacity(v3_qcow2_t *pf) { return pf ? pf->header.size : 0; } static inline uint64_t v3_qcow2_get_cluster_index(v3_qcow2_t *pf, uint64_t file_pos) { if (!pf) { return 0; } return file_pos >> pf->header.cluster_bits; } static int v3_qcow2_get_refcount(v3_qcow2_t *pf, uint64_t idx) { int res = -1, ret = 0; uint16_t val = 0; uint64_t table_idx = 0, block_idx = 0, block_offset = 0; if (!pf) { return res; } block_idx = idx & pf->refcount_block_mask; idx >>= pf->refcount_block_bits; table_idx = idx & pf->refcount_table_mask; ret = v3_file_read(pf->fd, (uint8_t*)&block_offset, sizeof(uint64_t), pf->header.refcount_table_offset + table_idx * sizeof(uint64_t)); // FIXME: how to deal with the wrong position if (ret != sizeof(uint64_t)) { ERROR("read failed\n"); return 0; } block_offset = be64toh(block_offset); // if cluster is not yet allocated, return 0 if (!block_offset) { return 0; } ret = v3_file_read(pf->fd, (uint8_t*)&val, sizeof(uint16_t), block_offset + block_idx * sizeof(uint16_t)); if (ret != sizeof(uint16_t)) { ERROR("read failed\n"); return 0; } val = be16toh(val); return val; } /* * this function is a wrapper of v3_qcow2_get_refcount * takes file offset and returns the reference count * it is commented to avoid compile warning * */ __attribute__((unused)) static int v3_qcow2_get_refcount_by_file_position(v3_qcow2_t *pf, uint64_t file_pos) { int res = -1; uint64_t idx = 0; if (!pf) { return res; } idx = v3_qcow2_get_cluster_index(pf, file_pos); return v3_qcow2_get_refcount(pf, idx); } /* * to allocate the contiguous clusters * return the cluster index in the QCOW2 file * return positive if successfully, otherwise zero(0) */ static uint64_t v3_qcow2_alloc_clusters(v3_qcow2_t *pf, uint32_t nb_clusters) { uint32_t i; int refcount = 0; uint64_t idx = 0, ret_idx = 0; if(!nb_clusters) { return 0; } if(!pf) { return 0; } /* * referenced the algorithm from Qemu */ retry: ret_idx = pf->free_cluster_index; for (i = 0; i < nb_clusters; i++) { idx = pf->free_cluster_index++; refcount = v3_qcow2_get_refcount(pf, idx); if(refcount < 0) { return 0; } else if(refcount) { goto retry; } } return ret_idx; } static int v3_qcow2_addr_split(v3_qcow2_t *qc2, uint64_t addr, uint64_t *l1_idx, uint64_t *l2_idx, uint64_t *offset) { if (!qc2 || !l1_idx || !l2_idx || !offset) { return -1; } *offset = addr & (qc2->cluster_size - 1); addr = addr >> qc2->header.cluster_bits; *l2_idx = addr & qc2->l2_mask; addr = addr >> qc2->l2_bits; *l1_idx = addr * qc2->l1_mask; return 0; } static v3_qcow2_t *v3_qcow2_open(struct v3_vm_info* vm, char *path, int flags) { int ret = 0; if(!path) { return NULL; } v3_qcow2_t *res = (v3_qcow2_t*)V3_Malloc(sizeof(v3_qcow2_t)); if (!res) { ERROR("failed to allocate\n"); goto failed; } memset(res, 0, sizeof(v3_qcow2_t)); res->fd = v3_file_open(vm, path, flags); if (res->fd < 0) { ERROR("failed to open underlying file\n"); goto clean_mem; } ret = v3_file_read(res->fd, (uint8_t*)&res->header, sizeof(res->header), 0); if (ret != sizeof(res->header)) { ERROR("failed to read header\n"); goto clean_mem; } res->header.magic = be32toh(res->header.magic); if (res->header.magic != QCOW2_MAGIC) { ERROR("wrong magic in header\n"); goto clean_file; } #ifdef __DEBUG__ else { DEBUG("right magic\n"); } #endif res->header.version = be32toh(res->header.version); if (res->header.version < 2) { ERROR("unsupported version: %d\n", res->header.version); goto clean_file; } #ifdef __DEBUG__ else { DEBUG("supported version: %d\n", res->header.version); } #endif res->header.backing_file_offset = be64toh(res->header.backing_file_offset); res->header.backing_file_size = be32toh(res->header.backing_file_size); if (res->header.backing_file_size) { #ifdef __DEBUG__ DEBUG("backing file size is larger than zero: %d\n", res->header.backing_file_size); #endif res->backing_file_name = (char*)V3_Malloc(res->header.backing_file_size + 1); if (!res->backing_file_name) { ERROR("failed to allocate memory for backing file name\n"); goto clean_file; } res->backing_file_name[res->header.backing_file_size] = 0; ret = v3_file_read(res->fd, (void*)res->backing_file_name, res->header.backing_file_size, res->header.backing_file_offset); if(ret != res->header.backing_file_size) { ERROR("failed to read backing file name from %s\n", path); V3_Free(res->backing_file_name); goto clean_file; } res->backing_qcow2 = v3_qcow2_open(vm, res->backing_file_name, flags); if(res->backing_qcow2) { DEBUG("load backing file successfully\n"); } else { ERROR("failed to load backing file, exit\n"); return NULL; } DEBUG("successfully read the backing file name: %s\n", res->backing_file_name); } else { // no backing file res->backing_qcow2 = NULL; DEBUG("read no backing file name since size == %d\n", res->header.backing_file_size); } res->header.cluster_bits = be32toh(res->header.cluster_bits); res->cluster_size = 1 << res->header.cluster_bits; res->l2_bits = res->header.cluster_bits - 3; res->l2_mask = (((uint64_t)1)<l2_bits) - 1; res->l1_bits = sizeof(uint64_t) * 8 - res->l2_bits - res->header.cluster_bits; res->l1_mask = (((uint64_t)1)<l1_bits) - 1; DEBUG("cluster_bits: %d\n", res->header.cluster_bits); res->header.size = be64toh(res->header.size); DEBUG("size: %llu\n", res->header.size); res->header.crypt_method = be32toh(res->header.crypt_method); if (res->header.crypt_method) { DEBUG("AES encryption\n"); } else { DEBUG("no encryption\n"); } res->header.l1_size = be32toh(res->header.l1_size); res->header.l1_table_offset = be64toh(res->header.l1_table_offset); res->header.refcount_table_offset = be64toh(res->header.refcount_table_offset); res->header.refcount_table_clusters = be32toh(res->header.refcount_table_clusters); res->refcount_block_bits = res->header.cluster_bits - 1; res->refcount_block_mask = (1LL<refcount_block_bits) - 1; res->refcount_table_bits = 8 * sizeof(uint64_t) - res->refcount_block_bits; res->refcount_table_mask = (1LL<refcount_table_bits) - 1; res->header.nb_snapshots = be32toh(res->header.nb_snapshots); res->header.snapshots_offset = be64toh(res->header.snapshots_offset); DEBUG("l1 size: %d\n", res->header.l1_size); DEBUG("l1 table offset: %llu\n", res->header.l1_table_offset); DEBUG("refcount_table_offset: %llu\n", res->header.refcount_table_offset); DEBUG("refcount_table_clusters: %d\n", res->header.refcount_table_clusters); DEBUG("nb_snapshots: %d\n", res->header.nb_snapshots); DEBUG("snapshots_offset: %llu\n", res->header.snapshots_offset); res->free_cluster_index = 1; // TODO: initialize the free cluster index to a reasonable value while (1) { if (v3_qcow2_get_refcount(res, res->free_cluster_index)) { res->free_cluster_index++; } else { break; } } return res; clean_file: v3_file_close(res->fd); clean_mem: V3_Free(res); failed: return NULL; } static void v3_qcow2_close(v3_qcow2_t *pf) { if(!pf) { return; } v3_file_close(pf->fd); if (pf->backing_file_name) { V3_Free(pf->backing_file_name); } if (pf->backing_qcow2) { v3_qcow2_close(pf->backing_qcow2); } V3_Free(pf); } static uint64_t v3_qcow2_get_cluster_offset(v3_qcow2_t *qc, uint64_t l1_idx, uint64_t l2_idx, uint64_t offset) { uint64_t res = 0; uint64_t l1_val = 0, l2_val = 0; v3_qcow2_table_entry_t *ent = NULL; int ret = 0; if (!qc) { goto done; } if (l1_idx >= qc->header.l1_size) { return 0ULL; } ret = v3_file_read(qc->fd, (void*)&l1_val, sizeof(uint64_t), l1_idx * sizeof(uint64_t) + qc->header.l1_table_offset); if (ret != sizeof(uint64_t)) { ERROR("Failed to read L1\n"); goto done; } l1_val = be64toh(l1_val); ent = (v3_qcow2_table_entry_t*)&l1_val; if (!ent->offset) { goto done; } ret = v3_file_read(qc->fd, (void*)&l2_val, sizeof(uint64_t), l2_idx * sizeof(uint64_t) + ent->offset); if (ret != sizeof(uint64_t)) { ERROR("Failed to read L2\n"); goto done; } l2_val = be64toh(l2_val); ent = (v3_qcow2_table_entry_t*)&l2_val; res = ent->offset; done: return res; } static int v3_qcow2_read_cluster(v3_qcow2_t *pf, uint8_t *buff, uint64_t pos, int len) { int ret = 0; uint64_t l1_idx = 0, l2_idx = 0, offset = 0; uint64_t file_offset = 0; if(!pf || !buff || !len) { return -1; } ret = v3_qcow2_addr_split(pf, pos, &l1_idx, &l2_idx, &offset); if (ret) { ERROR("failed to split address\n"); return -1; } file_offset = v3_qcow2_get_cluster_offset(pf, l1_idx, l2_idx, offset); if (file_offset) { ret = v3_file_read(pf->fd, buff, len, file_offset + (pos & (pf->cluster_size - 1))); // it is possible to get a negative value because of the hole if (ret < 0) { return -1; } } else if(pf->backing_qcow2) { return v3_qcow2_read_cluster(pf->backing_qcow2, buff, pos, len); } else { memset(buff, 0, len); } return 0; } static int v3_qcow2_read(v3_qcow2_t *pf, uint8_t *buff, uint64_t pos, int len) { if(!pf || !buff || !len) { return -1; } uint64_t next_addr, cur_len; int ret = 0; while (len) { next_addr = (pos + pf->cluster_size) & ~(pf->cluster_size - 1); cur_len = next_addr - pos; cur_len = cur_len < len ? cur_len : len; //DEBUG("pos=%lu, len=%lu\n", pos, cur_len); ret = v3_qcow2_read_cluster(pf, buff, pos, cur_len); if (ret) { return -1; } buff += cur_len; pos += cur_len; len -= cur_len; } return 0; } // in this function, we assmue we must have the corresponding refcount block // so we will not allocate the refcount block here static int v3_qcow2_update_refcount(v3_qcow2_t *pf, uint64_t cluster_idx, int count) { uint64_t table_idx = 0, block_idx = 0, block_offset = 0, idx = cluster_idx; int ret = 0; uint16_t val = count; if (!pf) { return -1; } block_idx = idx & pf->refcount_block_mask; idx >>= pf->refcount_block_bits; table_idx = idx & pf->refcount_table_mask; ret = v3_file_read(pf->fd, (uint8_t*)&block_offset, sizeof(uint64_t), pf->header.refcount_table_offset + table_idx * sizeof(uint64_t)); if (ret != sizeof(uint64_t) || !block_offset) { ERROR("something wrong with update refcount, exit\n"); return -1; } block_offset = be64toh(block_offset); val = htobe16(val); ret = v3_file_write(pf->fd, (uint8_t*)&val, sizeof(uint16_t), block_offset + block_idx * sizeof(uint16_t)); if (ret != sizeof(uint16_t)) { ERROR("write failed when update refcount, exit\n"); return -1; } return 0; } // in this function, we need to resolve the circular dependency when the refcount itself is not allocated // since for cluster_bits==16, it needs 65536G to use more than one cluster to contain all the refcount // table, we don't handle that case // of course, we can handle this case if we have enough time static int v3_qcow2_alloc_refcount(v3_qcow2_t *pf, uint64_t cluster_idx) { int res = -1, ret; uint8_t zero_buff[INIT_BUFF_SIZE]; uint16_t val = 0; uint64_t idx = cluster_idx, table_idx = 0, block_idx = 0, block_offset = 0; uint64_t new_cluster_idx, new_table_idx = 0, new_block_idx = 0, write_value; uint64_t left_size, start_offset, buf_length; if (!pf) { return -1; } block_idx = idx & pf->refcount_block_mask; idx >>= pf->refcount_block_bits; table_idx = idx & pf->refcount_table_mask; // TODO: re-allocate larger refcount table if needed retry: ret = v3_file_read(pf->fd, (uint8_t*)&block_offset, sizeof(uint64_t), pf->header.refcount_table_offset + table_idx * sizeof(uint64_t)); if (ret != sizeof(uint64_t) || table_idx > pf->header.refcount_table_clusters) { ERROR("read failed, exit!\n"); return -1; } block_offset = be64toh(block_offset); if (!block_offset) { // allocate a cluster as a new refcount block // and also initialize this cluster with zeros new_cluster_idx = v3_qcow2_alloc_clusters(pf, 1); if (new_cluster_idx <= 0) { ERROR("failed to allocate new cluster, exit!\n"); return -1; } idx = new_cluster_idx; new_block_idx = idx & pf->refcount_block_mask; idx >>= pf->refcount_block_bits; new_table_idx = idx & pf->refcount_table_mask; // initialize with zeros start_offset = new_cluster_idx << pf->header.cluster_bits; left_size = pf->cluster_size; memset(zero_buff, 0, INIT_BUFF_SIZE); while (left_size > 0) { buf_length = INIT_BUFF_SIZE < left_size ? INIT_BUFF_SIZE : left_size; ret = v3_file_write(pf->fd, zero_buff, buf_length, start_offset); if (ret != buf_length) { ERROR("something wrong with write, exit\n"); return -1; } start_offset += buf_length; left_size -= buf_length; } // update the refcount table with the new refcount block write_value = htobe64(new_table_idx << pf->header.cluster_bits); ret = v3_file_write(pf->fd, (uint8_t*)&write_value, sizeof(uint64_t), pf->header.refcount_table_offset + table_idx * sizeof(uint64_t)); if (ret != sizeof(uint64_t) ) { ERROR("write of data failed\n"); return -1; } if (new_table_idx == table_idx) { // in the same refcount block, increase its refcount here val = htobe16(1); ret = v3_file_write(pf->fd, (uint8_t*)&val, sizeof(uint16_t), (new_table_idx << pf->header.cluster_bits) + sizeof(uint16_t) * new_block_idx); if (ret != sizeof(uint16_t)) { ERROR("write failed\n"); return -1; } } else { v3_qcow2_alloc_refcount(pf, new_cluster_idx); v3_qcow2_update_refcount(pf, new_cluster_idx, 1); } goto retry; } res = 0; return res; } static int v3_qcow2_increase_refcount(v3_qcow2_t *pf, uint64_t cluster_idx) { int refcount = 0; if (!pf) { return -1; } refcount = v3_qcow2_get_refcount(pf, cluster_idx); if (refcount <= 0) { // execute to here means that no cluster block entry is allocated // we need to allocate the entry here refcount = v3_qcow2_alloc_refcount(pf, cluster_idx); if (refcount) { ERROR("something wrong when allocate refcount entry, exit!\n"); return -1; } refcount = 1; } else { refcount++; } // write the refcount back to the file return v3_qcow2_update_refcount(pf, cluster_idx, refcount); } /* * this function is to decrease the reference count of a cluster * since the snapshot is not implemented, */ __attribute__ ((unused)) static int v3_qcow2_decrease_refcount(v3_qcow2_t *pf, uint64_t cluster_idx) { int refcount = 0; if (!pf) { return -1; } refcount = v3_qcow2_get_refcount(pf, cluster_idx); if(refcount <= 0) { ERROR("attempt to decrease the refcount for a cluster, exit\n"); return -1; } refcount--; return v3_qcow2_update_refcount(pf, cluster_idx, refcount); } /* * do nothing but return if no need to allocate new cluster * only allocate one cluster if necessary */ static uint64_t v3_qcow2_alloc_cluster_offset(v3_qcow2_t *pf, uint64_t pos) { uint64_t res = 0, l1_idx = 0, l2_idx = 0, offset = 0, l2_cluster_offset, done_bytes; uint64_t l2_cluster_idx; uint64_t cluster_offset = 0; int ret = 0; uint8_t init_buff[INIT_BUFF_SIZE], *data_buff; if (!pf) { return res; } ret = v3_qcow2_addr_split(pf, pos, &l1_idx, &l2_idx, &offset); if (ret) { ERROR("cannot split address\n"); return res; } // FIXME: in fact, we should check the refcount to be 1, // otherwise we should copy // do it later res = v3_qcow2_get_cluster_offset(pf, l1_idx, l2_idx, offset); if (res) { cluster_offset = res; goto done; } /* * need to allocate a new cluster for write * also need to update the l1 and l2 table */ ret = v3_file_read(pf->fd, (uint8_t*)&l2_cluster_offset, sizeof(uint64_t), pf->header.l1_table_offset + sizeof(uint64_t) * l1_idx); if (ret != sizeof(uint64_t)) { ERROR("read failed\n"); return res; } l2_cluster_offset = be64toh(l2_cluster_offset) & ~(QCOW2_COPIED | QCOW2_COMPRESSED); if (!l2_cluster_offset/*l1_idx >= pf->header.l1_size*/) { // huh? /* * need to allocate a new l1 entry * for simplicity, only allow 2^(cluster_bits-3) entry in l1 table */ l2_cluster_idx = v3_qcow2_alloc_clusters(pf, 1); l2_cluster_offset = (l2_cluster_idx << pf->header.cluster_bits); // increase the reference count for this cluster v3_qcow2_increase_refcount(pf, l2_cluster_idx); memset(init_buff, 0, INIT_BUFF_SIZE); for (done_bytes = 0; done_bytes < pf->cluster_size; done_bytes += INIT_BUFF_SIZE) { ret = v3_file_write(pf->fd, init_buff, INIT_BUFF_SIZE, l2_cluster_offset + done_bytes); if (ret != INIT_BUFF_SIZE) { ERROR("write failed\n"); return res; } } /* * set the copied bit */ l2_cluster_offset |= QCOW2_COPIED; l2_cluster_offset = htobe64(l2_cluster_offset); ret = v3_file_write(pf->fd, (uint8_t*)&l2_cluster_offset, sizeof(uint64_t), pf->header.l1_table_offset + sizeof(uint64_t) * l1_idx); if (ret != sizeof(uint64_t)) { ERROR("write failed\n"); return res; } } ret = v3_file_read(pf->fd, (uint8_t*)&l2_cluster_offset, sizeof(uint64_t), pf->header.l1_table_offset + sizeof(uint64_t) * l1_idx); if (ret != sizeof(uint64_t)) { ERROR("read failed\n"); return res; } l2_cluster_offset = be64toh(l2_cluster_offset) & ~(QCOW2_COPIED | QCOW2_COMPRESSED); /* * begin to retrieve cluster_offset */ // adjust the l2_cluster_offset to the right entry address l2_cluster_offset += sizeof(uint64_t) * l2_idx; ret = v3_file_read(pf->fd, (uint8_t*)&cluster_offset, sizeof(uint64_t), l2_cluster_offset); if (ret!=sizeof(uint64_t)) { ERROR("read failed\n"); return res; } cluster_offset = be64toh(cluster_offset) & ~(QCOW2_COPIED | QCOW2_COMPRESSED); if (!cluster_offset) { /* * if the cluster_offset is not allocated */ l2_cluster_idx = v3_qcow2_alloc_clusters(pf, 1); cluster_offset = (l2_cluster_idx << pf->header.cluster_bits); // TODO: initialization // initialize the cluster with the original data data_buff = (uint8_t*)V3_Malloc(pf->cluster_size); if (data_buff) { pos = (pos >> pf->header.cluster_bits) << pf->header.cluster_bits; v3_qcow2_read_cluster(pf, data_buff, pos, pf->cluster_size); ret = v3_file_write(pf->fd, data_buff, pf->cluster_size, cluster_offset); if (ret!=pf->cluster_size) { ERROR("write failed\n"); return res; } V3_Free(data_buff); } else { ERROR("failed to initialize the original data\n"); } offset = htobe64(cluster_offset | QCOW2_COPIED); ret = v3_file_write(pf->fd, (uint8_t*)&offset, sizeof(uint64_t), l2_cluster_offset); if (ret!=sizeof(uint64_t)) { ERROR("write failed\n"); return res; } v3_qcow2_increase_refcount(pf, l2_cluster_idx); } done: return cluster_offset; } static int v3_qcow2_write_cluster(v3_qcow2_t *pf, uint8_t *buff, uint64_t pos, int len) { if(!pf || !buff) { return -1; } uint64_t cluster_addr, cluster_offset; int ret = 0; cluster_addr = v3_qcow2_alloc_cluster_offset(pf, pos); if (!cluster_addr) { ERROR("zero cluster address\n"); return -1; } cluster_offset = pos & (pf->cluster_size - 1); ret = v3_file_write(pf->fd, buff, len, cluster_addr + cluster_offset); if (ret != len) { ERROR("write failed\n"); return -1; } return 0; } static int v3_qcow2_write(v3_qcow2_t *pf, uint8_t *buff, uint64_t pos, int len) { if (!pf || !buff || !len) { return -1; } uint64_t next_addr, cur_len; int ret = 0; while (len) { next_addr = (pos + pf->cluster_size) & ~(pf->cluster_size - 1); cur_len = next_addr - pos; cur_len = cur_len < len ? cur_len : len; DEBUG("pos=%llu, len=%llu\n", pos, cur_len); ret = v3_qcow2_write_cluster(pf, buff, pos, cur_len); if (ret) { return -1; } buff += cur_len; pos += cur_len; len -= cur_len; } return ret; } static int read(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { v3_qcow2_t * disk = (v3_qcow2_t *) private_data; DEBUG("QCOW Reading %llu bytes from %llu to 0x%p\n", num_bytes, lba, buf); if (lba + num_bytes > disk->header.size) { ERROR("Out of bounds read: lba=%llu, num_bytes=%llu, capacity=%llu\n", lba, num_bytes, disk->header.size); return -1; } return v3_qcow2_read(disk, buf, lba, num_bytes); } static int write(uint8_t * buf, uint64_t lba, uint64_t num_bytes, void * private_data) { v3_qcow2_t * disk = (v3_qcow2_t *) private_data; DEBUG("QCOW Writing %llu bytes from 0x%p to %llu\n", num_bytes, buf, lba); if (lba + num_bytes > disk->header.size) { ERROR("Out of bounds read: lba=%llu, num_bytes=%llu, capacity=%llu\n", lba, num_bytes, disk->header.size); return -1; } return v3_qcow2_write(disk, buf, lba, num_bytes); } static uint64_t get_capacity(void * private_data) { v3_qcow2_t * disk = (v3_qcow2_t *)private_data; DEBUG("Querying QCOWDISK capacity %llu\n", v3_qcow2_get_capacity(disk)); return v3_qcow2_get_capacity(disk); } static struct v3_dev_blk_ops blk_ops = { .read = read, .write = write, .get_capacity = get_capacity, }; static int disk_free(v3_qcow2_t * disk) { v3_qcow2_close(disk); return 0; } static struct v3_device_ops dev_ops = { .free = (int (*)(void *))disk_free, }; static int disk_init(struct v3_vm_info * vm, v3_cfg_tree_t * cfg) { v3_qcow2_t * disk = NULL; char * path = v3_cfg_val(cfg, "path"); char * dev_id = v3_cfg_val(cfg, "ID"); char * writable = v3_cfg_val(cfg, "writable"); char * writeable = v3_cfg_val(cfg, "writeable"); v3_cfg_tree_t * frontend_cfg = v3_cfg_subtree(cfg, "frontend"); int flags = FILE_OPEN_MODE_READ; PrintDebug(vm,VCORE_NONE,"Welcome to the QCOWDISK Implementation!\n"); if ( ((writable) && (writable[0] == '1')) || ((writeable) && (writeable[0] == '1')) ) { flags |= FILE_OPEN_MODE_WRITE; } if (path == NULL) { PrintError(vm, VCORE_NONE, "Missing path (%s) for %s\n", path, dev_id); return -1; } disk = v3_qcow2_open(vm, path, flags); if (disk == NULL) { PrintError(vm, VCORE_NONE, "Could not open file disk:%s\n", path); return -1; } struct vm_device * dev = v3_add_device(vm, dev_id, &dev_ops, disk); if (dev == NULL) { PrintError(vm, VCORE_NONE, "Could not attach device %s\n", dev_id); V3_Free(disk); return -1; } if (v3_dev_connect_blk(vm, v3_cfg_val(frontend_cfg, "tag"), &blk_ops, frontend_cfg, disk) == -1) { PrintError(vm, VCORE_NONE, "Could not connect %s to frontend %s\n", dev_id, v3_cfg_val(frontend_cfg, "tag")); v3_remove_device(dev); return -1; } return 0; } device_register("QCOWDISK", disk_init)