10 #include <sys/ioctl.h>
12 #include <sys/types.h>
20 #define SYS_PATH "/sys/devices/system/memory/"
28 unsigned long long *start_offline;
29 unsigned long long *len_offline;
31 static int read_offlined();
32 static int write_offlined();
33 static int find_offlined(unsigned long long base_addr);
34 static int clear_offlined();
37 static int offline_memory(unsigned long long mem_size_bytes,
38 unsigned long long mem_min_start,
40 unsigned long long *num_bytes,
41 unsigned long long *base_addr);
43 static int online_memory(unsigned long long num_bytes,
44 unsigned long long base_addr);
49 int main(int argc, char * argv[]) {
50 unsigned long long mem_size_bytes = 0;
51 unsigned long long mem_min_start = 0;
57 enum {NONE, ADD, REMOVE} op;
60 unsigned long long num_bytes, base_addr;
61 struct v3_mem_region mem;
63 while ((c=getopt(argc,argv,"harklm:n:"))!=-1) {
81 mem_min_start = atoll(optarg) * (1024*1024);
88 printf("-n requires the numa node...\n");
90 } else if (optopt=='m') {
91 printf("-m requires the minimum starting address (in MB)...\n");
94 printf("Unknown option %c\n",optopt);
99 printf("Unknown option %c\n",optopt);
105 if (op==NONE || optind==argc || help) {
106 printf("usage: v3_mem [ [-k] [-l] [-n k] [-m n] -a <memory size (MB)>] | [-r <hexaddr> | offline]\n\n"
107 "Palacios Memory Management\n\nMemory Addition\n"
108 " -a <mem> Allocate memory for use by Palacios (MB).\n\n"
109 " With -k this requests in-kernel allocation\n"
110 " Without -k this attempts to offline memory via hot remove\n\n"
111 " With -l the request or offlining is limited to first 4 GB\n"
112 " Without -l the request or offlining has no limits\n\n"
113 " With -m n the search for offlineable memory starts at n MB\n"
114 " Without -m n the search for offlineable memory starts at 0 MB\n\n"
115 " With -n i the request is for numa node i\n"
116 " Without -n i the request can be satified on any numa node\n\n"
118 " -r <hexaddr> Free Palacios memory containing hexaddr, online it if needed\n"
119 " -r offline Free all offline Palacios memory and online it\n"
126 mem_size_bytes = atoll(argv[optind]) * (1024 * 1024);
127 } else if (op==REMOVE) {
128 if (!strcasecmp(argv[optind],"offline")) {
131 base_addr=strtoll(argv[optind],NULL,16);
135 if (!getenv("PALACIOS_DIR")) {
136 printf("Please set the PALACIOS_DIR variable\n");
140 strcpy(offname,getenv("PALACIOS_DIR"));
141 strcat(offname,"/.v3offlinedmem");
143 if (!(off=fopen(offname,"a+"))) {
144 printf("Cannot open or create offline memory file %s",offname);
148 // removing all offlined memory we added is a special case
149 if (op==REMOVE && alloffline) {
153 // we just need to reinvoke ourselves
155 for (i=0;i<num_offline;i++) {
157 sprintf(cmd,"v3_mem -r %llx", start_offline[i]);
165 v3_fd = open(v3_dev, O_RDONLY);
168 printf("Error opening V3Vee control device\n");
176 printf("Trying to offline memory (size=%llu, min_start=%llu, limit32=%d)\n",mem_size_bytes,mem_min_start,limit32);
177 if (offline_memory(mem_size_bytes,mem_min_start,limit32, &num_bytes, &base_addr)) {
178 printf("Could not offline memory\n");
184 fprintf(off,"%llx\t%llx\n",base_addr, num_bytes);
186 mem.type=PREALLOCATED;
188 mem.base_addr=base_addr;
189 mem.num_pages=num_bytes/4096;
192 printf("Generating memory allocation request (size=%llu, limit32=%d)\n", mem_size_bytes, limit32);
193 mem.type = limit32 ? REQUESTED32 : REQUESTED;
196 mem.num_pages = mem_size_bytes / 4096;
199 printf("Allocation request is: type=%d, node=%d, base_addr=0x%llx, num_pages=%llu\n",
200 mem.type, mem.node, mem.base_addr, mem.num_pages);
202 if (ioctl(v3_fd, V3_ADD_MEMORY, &mem)<0) {
203 printf("Request rejected by Palacios\n");
208 printf("Request accepted by Palacios\n");
214 } else if (op==REMOVE) {
219 entry=find_offlined(base_addr);
222 // no need to offline
225 mem.type=PREALLOCATED;
228 mem.base_addr=base_addr;
230 // now remove it from palacios
231 printf("Deallocation request is: type=%d, base_addr=0x%llx\n",
232 mem.type, mem.base_addr);
234 if (ioctl(v3_fd, V3_REMOVE_MEMORY, &mem)<0) {
235 printf("Request rejected by Palacios\n");
241 printf("Request accepted by Palacios\n");
244 printf("Onlining the memory to make it available to the kernel\n");
245 online_memory(start_offline[entry],len_offline[entry]);
247 len_offline[entry] = 0;
253 printf("Memory was deallocated in the kernel\n");
266 static int dir_filter(const struct dirent * dir) {
267 if (strncmp("memory", dir->d_name, 6) == 0) {
275 static int dir_cmp(const struct dirent **dir1, const struct dirent ** dir2) {
276 int num1 = atoi((*dir1)->d_name + 6);
277 int num2 = atoi((*dir2)->d_name + 6);
284 #define UNWIND(first,last) \
287 for (i = first; i <= last; i++) { \
290 snprintf(name,256,"%smemory%d/state",SYS_PATH,i); \
291 f=fopen(name,"r+"); \
293 perror("Cannot open state file\n"); \
296 printf("Re-onlining block %d (%s)\n",i,name); \
297 fprintf(f,"online\n"); \
303 static int offline_memory(unsigned long long mem_size_bytes,
304 unsigned long long mem_min_start,
306 unsigned long long *num_bytes,
307 unsigned long long *base_addr)
310 unsigned int block_size_bytes = 0;
311 int bitmap_entries = 0;
312 unsigned char * bitmap = NULL;
319 printf("Trying to find %dMB (%d bytes) of memory above %llu with limit32=%d\n", mem_size_bytes/(1024*1024), mem_size_bytes, mem_min_start, limit32);
321 /* Figure out the block size */
324 char tmp_buf[BUF_SIZE];
326 tmp_fd = open(SYS_PATH "block_size_bytes", O_RDONLY);
329 perror("Could not open block size file: " SYS_PATH "block_size_bytes");
333 if (read(tmp_fd, tmp_buf, BUF_SIZE) <= 0) {
334 perror("Could not read block size file: " SYS_PATH "block_size_bytes");
340 block_size_bytes = strtoll(tmp_buf, NULL, 16);
342 printf("Memory block size is %dMB (%d bytes)\n", block_size_bytes / (1024 * 1024), block_size_bytes);
347 num_blocks = mem_size_bytes / block_size_bytes;
348 if (mem_size_bytes % block_size_bytes) num_blocks++;
350 mem_min_start = block_size_bytes *
351 ((mem_min_start / block_size_bytes) + (!!(mem_min_start % block_size_bytes)));
353 printf("Looking for %d blocks of memory starting at %p (block %llu) with limit32=%d\n", num_blocks, (void*)mem_min_start, mem_min_start/block_size_bytes,limit32);
356 // We now need to find <num_blocks> consecutive offlinable memory blocks
358 /* Scan the memory directories */
360 struct dirent ** namelist = NULL;
365 int first_block = mem_min_start/block_size_bytes;
366 int limit_block = 0xffffffff / block_size_bytes; // for 32 bit limiting
368 last_block = scandir(SYS_PATH, &namelist, dir_filter, dir_cmp);
369 bitmap_entries = atoi(namelist[last_block - 1]->d_name + 6) + 1;
371 size = bitmap_entries / 8;
372 if (bitmap_entries % 8) size++;
374 bitmap = alloca(size);
377 printf("ERROR: could not allocate space for bitmap\n");
381 memset(bitmap, 0, size);
383 for (i = 0 ; j < bitmap_entries - 1; i++) {
384 struct dirent * tmp_dir = namelist[i];
386 char status_str[BUF_SIZE];
387 char fname[BUF_SIZE];
389 memset(status_str, 0, BUF_SIZE);
390 memset(fname, 0, BUF_SIZE);
392 snprintf(fname, BUF_SIZE, "%s%s/removable", SYS_PATH, tmp_dir->d_name);
394 j = atoi(tmp_dir->d_name + 6);
400 printf("Skipping %s due to minimum start constraint\n",fname);
404 if (limit32 && i>limit_block) {
405 printf("Skipping %s due to 32 bit constraint\n",fname);
410 // The prospective block must be (a) removable, and (b) currently online
412 printf("Checking %s...", fname);
414 block_fd = open(fname, O_RDONLY);
416 if (block_fd == -1) {
417 printf("Hotpluggable memory not supported or could not determine if block is removable...\n");
421 if (read(block_fd, status_str, BUF_SIZE) <= 0) {
422 perror("Could not read block removability information\n");
426 status_str[BUF_SIZE-1]=0;
430 if (atoi(status_str) == 1) {
431 printf("Removable ");
433 printf("Not removable\n");
437 snprintf(fname, BUF_SIZE, "%s%s/state", SYS_PATH, tmp_dir->d_name);
439 block_fd = open(fname, O_RDONLY);
442 perror("Could not open block state\n");
446 if (read(block_fd, status_str, BUF_SIZE) <=0) {
447 perror("Could not read block state information\n");
451 status_str[BUF_SIZE-1]=0;
455 if (!strncasecmp(status_str,"offline",7)) {
456 printf("and Already Offline (unusable)\n");
457 } else if (!strncasecmp(status_str,"online",6)) {
458 printf("and Online (usable)\n");
459 bitmap[major] |= (0x1 << minor);
461 printf("and in Unknown State '%s' (unusable)\n",status_str);
470 /* Scan bitmap for enough consecutive space */
472 // num_blocks: The number of blocks we need to find
473 // bitmap: bitmap of blocks (1 == allocatable)
474 // bitmap_entries: number of blocks in the system/number of bits in bitmap
475 // reg_start: The block index where our allocation will start
480 for (i = 0; i < bitmap_entries; i++) {
484 if (!(bitmap[i_major] & (0x1 << i_minor))) {
485 reg_start = i + 1; // skip the region start to next entry
492 if (run_len >= num_blocks) {
498 if (run_len < num_blocks) {
499 fprintf(stderr, "Could not find enough consecutive memory blocks... (found %d)\n", run_len);
500 // no offlining yet, so no need to unwind here
506 /* Offline memory blocks starting at reg_start */
510 for (i = 0; i < num_blocks; i++) {
511 FILE * block_file = NULL;
514 memset(fname, 0, 256);
516 snprintf(fname, 256, "%smemory%d/state", SYS_PATH, i + reg_start);
518 block_file = fopen(fname, "r+");
520 if (block_file == NULL) {
521 perror("Could not open block file");
522 UNWIND(reg_start, i+reg_start-1);
527 printf("Offlining block %d (%s)\n", i + reg_start, fname);
528 fprintf(block_file, "offline\n");
536 /* We asked to offline set of blocks, but Linux could have lied.
537 * To be safe, check whether blocks were offlined and start again if not
543 mem_ready = 1; // Hopefully we are ok...
546 for (i = 0; i < num_blocks; i++) {
548 char fname[BUF_SIZE];
549 char status_buf[BUF_SIZE];
552 memset(fname, 0, BUF_SIZE);
553 memset(status_buf, 0, BUF_SIZE);
555 snprintf(fname, BUF_SIZE, "%smemory%d/state", SYS_PATH, i + reg_start);
558 block_fd = open(fname, O_RDONLY);
560 if (block_fd == -1) {
561 perror("Could not open block state file");
565 if (read(block_fd, status_buf, BUF_SIZE) <= 0) {
566 perror("Could not read block state");
570 status_buf[BUF_SIZE]=0;
572 printf("Checking offlined block %d (%s)...", i + reg_start, fname);
574 int ret = strncmp(status_buf, "offline", strlen("offline"));
576 if (ret != 0) { // uh oh
578 int major = (i + reg_start) / 8;
579 int minor = (i + reg_start) % 8;
581 bitmap[major] &= ~(0x1 << minor); // mark the block as not removable in bitmap
583 mem_ready = 0; // Keep searching
585 printf("ERROR - block status is '%s'\n", status_buf);
588 UNWIND(reg_start,reg_start+num_blocks-1);
594 printf("Offlined Memory OK\n");
599 /* Memory is offlined. Calculate size and phys start addr to send to Palacios */
600 *num_bytes = (unsigned long long)(num_blocks) * (unsigned long long)(block_size_bytes);
601 *base_addr = (unsigned long long)(reg_start) * (unsigned long long)(block_size_bytes);
607 static int online_memory(unsigned long long base_addr,
608 unsigned long long num_bytes)
611 unsigned int block_size_bytes = 0;
612 int bitmap_entries = 0;
613 unsigned char * bitmap = NULL;
620 printf("Trying to online memory from %llu to %llu\n",base_addr,base_addr+num_bytes-1);
622 /* Figure out the block size */
625 char tmp_buf[BUF_SIZE];
627 tmp_fd = open(SYS_PATH "block_size_bytes", O_RDONLY);
630 perror("Could not open block size file: " SYS_PATH "block_size_bytes");
634 if (read(tmp_fd, tmp_buf, BUF_SIZE) <= 0) {
635 perror("Could not read block size file: " SYS_PATH "block_size_bytes");
641 block_size_bytes = strtoll(tmp_buf, NULL, 16);
643 printf("Memory block size is %dMB (%d bytes)\n", block_size_bytes / (1024 * 1024), block_size_bytes);
647 num_blocks = num_bytes / block_size_bytes;
648 if (num_bytes % block_size_bytes) num_blocks++;
650 reg_start = base_addr / block_size_bytes;
652 printf("That is %lu blocks of size %llu starting at block %d\n", num_blocks, block_size_bytes, reg_start);
656 /* Online memory blocks starting at reg_start */
660 for (i = 0; i < num_blocks; i++) {
661 FILE * block_file = NULL;
664 memset(fname, 0, 256);
666 snprintf(fname, 256, "%smemory%d/state", SYS_PATH, i + reg_start);
668 block_file = fopen(fname, "r+");
670 if (block_file == NULL) {
671 perror("Could not open block file");
676 printf("Onlining block %d (%s)\n", i + reg_start, fname);
677 fprintf(block_file, "online\n");
690 static int read_offlined()
693 unsigned long long base, len;
697 while (fscanf(off,"%llx\t%llx\n",&base,&len)==2) { num_offline++; }
700 start_offline=(unsigned long long *)calloc(num_offline, sizeof(unsigned long long));
701 len_offline=(unsigned long long *)calloc(num_offline, sizeof(unsigned long long));
703 if (!start_offline || !len_offline) {
704 printf("Cannot allocate space to load offline map\n");
709 for (i=0;i<num_offline;i++) {
710 fscanf(off,"%llx\t%llx",&(start_offline[i]),&(len_offline[i]));
712 // we are now back to the end, and can keep appending
717 static int write_offlined()
722 if (!(off=fopen(offname,"w+"))) { // truncate
723 printf("Cannot open %s for writing!\n");
727 for (i=0;i<num_offline;i++) {
728 if (len_offline[i]) {
729 fprintf(off,"%llx\t%llx\n",start_offline[i],len_offline[i]);
732 // we are now back to the end, and can keep appending
737 static int clear_offlined()
744 static int find_offlined(unsigned long long base_addr)
748 for (i=0;i<num_offline;i++) {
749 if (base_addr>=start_offline[i] &&
750 base_addr<(start_offline[i]+len_offline[i])) {