10 #include <sys/ioctl.h>
12 #include <sys/types.h>
20 #define SYS_PATH "/sys/devices/system/memory/"
27 static int offline_memory(unsigned long long mem_size_bytes,
28 unsigned long long mem_min_start,
30 unsigned long long *num_bytes,
31 unsigned long long *base_addr);
33 int main(int argc, char * argv[]) {
34 unsigned long long mem_size_bytes = 0;
35 unsigned long long mem_min_start = 0;
41 unsigned long long num_bytes, base_addr;
42 struct v3_mem_region mem;
44 if (argc<2 || argc>8) {
45 printf("usage: v3_mem [-r] [-l] [-n k] [-m n] <memory size (MB)>\n\n"
46 "Allocate memory for use by Palacios.\n\n"
47 "With -k this requests in-kernel allocation.\n"
48 "Without -k this attempts to offline memory via hot remove\n\n"
49 "With -l the request or offlining is limited to first 4 GB\n"
50 "Without -l the request or offlining has no limits\n\n"
51 "With -m n the offline memory search starts at n MB\n"
52 "Without -m n the offline memory search starts at 0 MB\n\n"
53 "With -n i the request is for numa node i\n"
54 "Without -n i the request can be on any numa node\n\n");
58 while ((c=getopt(argc,argv,"klmn:"))!=-1) {
67 mem_min_start = atoll(optarg) * (1024*1024);
74 printf("-n requires the numa node...\n");
75 } else if (optopt=='m') {
76 printf("-m requires the minimum starting address (in MB)...\n");
78 printf("Unknown option %c\n",optopt);
82 printf("Unknown option %c\n",optopt);
87 mem_size_bytes = atoll(argv[optind]) * (1024 * 1024);
89 v3_fd = open(v3_dev, O_RDONLY);
92 printf("Error opening V3Vee control device\n");
97 printf("Trying to offline memory (size=%llu, min_start=%llu, limit32=%d)\n",mem_size_bytes,mem_min_start,limit32);
98 if (offline_memory(mem_size_bytes,mem_min_start,limit32, &num_bytes, &base_addr)) {
99 printf("Could not offline memory\n");
102 mem.type=PREALLOCATED;
104 mem.base_addr=base_addr;
105 mem.num_pages=num_bytes/4096;
107 printf("Generating memory allocation request (size=%llu, limit32=%d)\n", mem_size_bytes, limit32);
108 mem.type = limit32 ? REQUESTED32 : REQUESTED;
111 mem.num_pages = mem_size_bytes / 4096;
114 printf("Allocation request is: type=%d, node=%d, base_addr=0x%llx, num_pages=%llu\n",
115 mem.type, mem.node, mem.base_addr, mem.num_pages);
117 if (ioctl(v3_fd, V3_ADD_MEMORY, &mem)<0) {
118 printf("Request rejected by Palacios\n");
122 printf("Request accepted by Palacios\n");
129 static int dir_filter(const struct dirent * dir) {
130 if (strncmp("memory", dir->d_name, 6) == 0) {
138 static int dir_cmp(const struct dirent **dir1, const struct dirent ** dir2) {
139 int num1 = atoi((*dir1)->d_name + 6);
140 int num2 = atoi((*dir2)->d_name + 6);
147 #define UNWIND(first,last) \
150 for (i = first; i <= last; i++) { \
153 snprintf(name,256,"%smemory%d/state",SYS_PATH,i); \
154 f=fopen(name,"r+"); \
156 perror("Cannot open state file\n"); \
159 printf("Re-onlining block %d (%s)\n",i,name); \
160 fprintf(f,"online\n"); \
166 static int offline_memory(unsigned long long mem_size_bytes,
167 unsigned long long mem_min_start,
169 unsigned long long *num_bytes,
170 unsigned long long *base_addr)
173 unsigned int block_size_bytes = 0;
174 int bitmap_entries = 0;
175 unsigned char * bitmap = NULL;
182 printf("Trying to find %dMB (%d bytes) of memory above %llu with limit32=%d\n", mem_size_bytes/(1024*1024), mem_size_bytes, mem_min_start, limit32);
184 /* Figure out the block size */
187 char tmp_buf[BUF_SIZE];
189 tmp_fd = open(SYS_PATH "block_size_bytes", O_RDONLY);
192 perror("Could not open block size file: " SYS_PATH "block_size_bytes");
196 if (read(tmp_fd, tmp_buf, BUF_SIZE) <= 0) {
197 perror("Could not read block size file: " SYS_PATH "block_size_bytes");
203 block_size_bytes = strtoll(tmp_buf, NULL, 16);
205 printf("Memory block size is %dMB (%d bytes)\n", block_size_bytes / (1024 * 1024), block_size_bytes);
210 num_blocks = mem_size_bytes / block_size_bytes;
211 if (mem_size_bytes % block_size_bytes) num_blocks++;
213 mem_min_start = block_size_bytes *
214 ((mem_min_start / block_size_bytes) + (!!(mem_min_start % block_size_bytes)));
216 printf("Looking for %d blocks of memory starting at %p (block %llu) with limit32=%d\n", num_blocks, (void*)mem_min_start, mem_min_start/block_size_bytes,limit32);
219 // We now need to find <num_blocks> consecutive offlinable memory blocks
221 /* Scan the memory directories */
223 struct dirent ** namelist = NULL;
228 int first_block = mem_min_start/block_size_bytes;
229 int limit_block = 0xffffffff / block_size_bytes; // for 32 bit limiting
231 last_block = scandir(SYS_PATH, &namelist, dir_filter, dir_cmp);
232 bitmap_entries = atoi(namelist[last_block - 1]->d_name + 6) + 1;
234 size = bitmap_entries / 8;
235 if (bitmap_entries % 8) size++;
237 bitmap = alloca(size);
240 printf("ERROR: could not allocate space for bitmap\n");
244 memset(bitmap, 0, size);
246 for (i = 0 ; j < bitmap_entries - 1; i++) {
247 struct dirent * tmp_dir = namelist[i];
249 char status_str[BUF_SIZE];
250 char fname[BUF_SIZE];
252 memset(status_str, 0, BUF_SIZE);
253 memset(fname, 0, BUF_SIZE);
255 snprintf(fname, BUF_SIZE, "%s%s/removable", SYS_PATH, tmp_dir->d_name);
257 j = atoi(tmp_dir->d_name + 6);
263 printf("Skipping %s due to minimum start constraint\n",fname);
267 if (limit32 && i>limit_block) {
268 printf("Skipping %s due to 32 bit constraint\n",fname);
273 // The prospective block must be (a) removable, and (b) currently online
275 printf("Checking %s...", fname);
277 block_fd = open(fname, O_RDONLY);
279 if (block_fd == -1) {
280 printf("Hotpluggable memory not supported or could not determine if block is removable...\n");
284 if (read(block_fd, status_str, BUF_SIZE) <= 0) {
285 perror("Could not read block removability information\n");
289 status_str[BUF_SIZE-1]=0;
293 if (atoi(status_str) == 1) {
294 printf("Removable ");
296 printf("Not removable\n");
300 snprintf(fname, BUF_SIZE, "%s%s/state", SYS_PATH, tmp_dir->d_name);
302 block_fd = open(fname, O_RDONLY);
305 perror("Could not open block state\n");
309 if (read(block_fd, status_str, BUF_SIZE) <=0) {
310 perror("Could not read block state information\n");
314 status_str[BUF_SIZE-1]=0;
318 if (!strncasecmp(status_str,"offline",7)) {
319 printf("and Already Offline (unusable)\n");
320 } else if (!strncasecmp(status_str,"online",6)) {
321 printf("and Online (usable)\n");
322 bitmap[major] |= (0x1 << minor);
324 printf("and in Unknown State '%s' (unusable)\n",status_str);
333 /* Scan bitmap for enough consecutive space */
335 // num_blocks: The number of blocks we need to find
336 // bitmap: bitmap of blocks (1 == allocatable)
337 // bitmap_entries: number of blocks in the system/number of bits in bitmap
338 // reg_start: The block index where our allocation will start
343 for (i = 0; i < bitmap_entries; i++) {
347 if (!(bitmap[i_major] & (0x1 << i_minor))) {
348 reg_start = i + 1; // skip the region start to next entry
355 if (run_len >= num_blocks) {
361 if (run_len < num_blocks) {
362 fprintf(stderr, "Could not find enough consecutive memory blocks... (found %d)\n", run_len);
363 // no offlining yet, so no need to unwind here
369 /* Offline memory blocks starting at reg_start */
373 for (i = 0; i < num_blocks; i++) {
374 FILE * block_file = NULL;
377 memset(fname, 0, 256);
379 snprintf(fname, 256, "%smemory%d/state", SYS_PATH, i + reg_start);
381 block_file = fopen(fname, "r+");
383 if (block_file == NULL) {
384 perror("Could not open block file");
385 UNWIND(reg_start, i+reg_start-1);
390 printf("Offlining block %d (%s)\n", i + reg_start, fname);
391 fprintf(block_file, "offline\n");
399 /* We asked to offline set of blocks, but Linux could have lied.
400 * To be safe, check whether blocks were offlined and start again if not
406 mem_ready = 1; // Hopefully we are ok...
409 for (i = 0; i < num_blocks; i++) {
411 char fname[BUF_SIZE];
412 char status_buf[BUF_SIZE];
415 memset(fname, 0, BUF_SIZE);
416 memset(status_buf, 0, BUF_SIZE);
418 snprintf(fname, BUF_SIZE, "%smemory%d/state", SYS_PATH, i + reg_start);
421 block_fd = open(fname, O_RDONLY);
423 if (block_fd == -1) {
424 perror("Could not open block state file");
428 if (read(block_fd, status_buf, BUF_SIZE) <= 0) {
429 perror("Could not read block state");
433 status_buf[BUF_SIZE]=0;
435 printf("Checking offlined block %d (%s)...", i + reg_start, fname);
437 int ret = strncmp(status_buf, "offline", strlen("offline"));
439 if (ret != 0) { // uh oh
441 int major = (i + reg_start) / 8;
442 int minor = (i + reg_start) % 8;
444 bitmap[major] &= ~(0x1 << minor); // mark the block as not removable in bitmap
446 mem_ready = 0; // Keep searching
448 printf("ERROR - block status is '%s'\n", status_buf);
451 UNWIND(reg_start,reg_start+num_blocks-1);
457 printf("Offlined Memory OK\n");
462 /* Memory is offlined. Calculate size and phys start addr to send to Palacios */
463 *num_bytes = (unsigned long long)(num_blocks) * (unsigned long long)(block_size_bytes);
464 *base_addr = (unsigned long long)(reg_start) * (unsigned long long)(block_size_bytes);