Created
June 17, 2011 23:08
-
-
Save karthick18/1032553 to your computer and use it in GitHub Desktop.
Just an example to remind that its futile to free memory in the child to avoid taking a break COW perf. hit. Check the header comments for more details.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Just an example to remind that its futile to free memory in the child | |
* allocated by the parent to avoid taking a break COW perf. hit. | |
* Makes sense only to free large chunk sizes in the child. Smaller chunk sizes | |
* aren't really trimmed by malloc and only end up causing perf hits with | |
* break COW pages (copy on write) when freeing in the child. | |
* A break COW is when free results in malloc lib. touching the freed chunk | |
* of memory resulting in a write protection page fault for the child that ends up | |
* unmapping the shared page table entry and then maps a writable page copy to the child. | |
* The net effect for RSS(resident set size) is the same as in the parent but | |
* with an additional page fault overhead caused by freeing of the chunks. | |
* Hence no point in freeing the memory assuming we aren't dealing with | |
* large chunk sizes (common scenario). | |
* Chunk sizes preferably > 64k/128k are unmapped or trimmed by malloc | |
* and ends up in reduced RSS for the child on free which gives a slight benefit | |
* over not freeing the chunk in the child coz of the break COW overhead | |
* as shown by the example run below with: | |
* ./brk_cow -s 1m | |
* | |
* Special thanks to Gopal.V the great, a.k.a (@t3rmin4t0r) for pointing out | |
* that its futile to issue free in the child. | |
* | |
* Compile with: gcc -o brk_cow brk_cow.c -g -Wall | |
* Example usage for testing: ./brk_cow -h | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <ctype.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <assert.h> | |
#include <string.h> | |
#include <sys/wait.h> | |
#include <sys/mman.h> | |
#include <malloc.h> | |
#include <getopt.h> | |
#define CHUNK_SIZE_DEFAULT (32<<10) | |
#define __PAGE_SIZE (page_size) | |
#define __PAGE_SHIFT (page_shift) | |
#define NUM_CHILDS_DEFAULT (8) | |
#define NUM_ALLOCS_DEFAULT (1024) | |
static void **mem_pool; | |
static int page_size, page_shift; | |
struct memconfig | |
{ | |
int allocs; | |
int chunk_size; | |
int childs; | |
int verbose; | |
int dont_free; | |
}; | |
static struct memconfig memconfig; | |
#ifdef USE_MMAP | |
#undef malloc | |
#undef free | |
#define malloc xmalloc | |
#define calloc xcalloc | |
#define free xfree | |
static void *xmalloc(int size) | |
{ | |
char *addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); | |
return addr == MAP_FAILED ? NULL : (void*)addr; | |
} | |
static void *xcalloc(int num, int size) { return xmalloc(num *size); } | |
static void xfree(void *addr) | |
{ | |
munmap(addr, memconfig.chunk_size); | |
} | |
#endif | |
#define GET_PAGE_FAULTS (1) | |
#define GET_RSS (2) | |
static long int get_pid_stat(int pid, int flags) | |
{ | |
static char cmdbuf[80], data[512]; | |
static int fd, bytes; | |
static long int min_flt, rss; | |
snprintf(cmdbuf, sizeof(cmdbuf), "/proc/%d/stat", pid); | |
/* | |
* Avoiding using the glibc file io to be safe against page faults during multiple runs | |
* of the routine. Also statics above to avoid page faults for stack. | |
*/ | |
if((fd = open(cmdbuf, O_RDONLY)) >= 0) | |
{ | |
if( (bytes = read(fd, data, sizeof(data))) > 0) | |
data[bytes] = 0; | |
close(fd); | |
switch(flags) | |
{ | |
case GET_PAGE_FAULTS: | |
/* | |
* Get minor faults for anonymous pages. | |
*/ | |
if(sscanf(data, | |
"%*d %*s %*c %*d %*d %*d %*d %*d %*u %lu" \ | |
"%*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %*u %*u" \ | |
"%*u %*u %*u %*u %*u %*u %*u %*u %*d %*d %*u %*u %*u %*u %*d", | |
&min_flt) != 1) | |
{ | |
perror("sscanf:"); | |
return 0; | |
} | |
return min_flt; | |
case GET_RSS: | |
if(sscanf(data, | |
"%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u" \ | |
"%*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u %*u %*u" \ | |
"%*u %*u %*u %*u %*u %*u %*u %*u %*d %*d %*u %*u %*u %*u %*d", | |
&rss) != 1) | |
{ | |
perror("sscanf:"); | |
return 0; | |
} | |
return rss; | |
default: | |
break; | |
} | |
} | |
return 0; | |
} | |
static void free_chunk(int start, int count) | |
{ | |
register int i; | |
int pid = getpid(); | |
long int min_flt = 0, min_flt2 = 0, rss = 0; | |
if(start + count > memconfig.allocs) return; | |
printf("Child [%d] freeing chunk [%d - %d], %d bytes\n", pid, start, start+count, | |
count * memconfig.chunk_size); | |
int c = 0; | |
min_flt = get_pid_stat(pid, GET_PAGE_FAULTS); | |
while(c++ < 2) | |
{ | |
for(i = start; i < start+count; ++i) | |
{ | |
if(c == 2 && !memconfig.dont_free) free(mem_pool[i]); | |
} | |
min_flt = min_flt2; | |
min_flt2 = get_pid_stat(pid, GET_PAGE_FAULTS); | |
} | |
printf("Faults on free mem for child [%d] = [%ld]\n", pid, min_flt2 - min_flt); | |
rss = get_pid_stat(pid, GET_RSS); | |
printf("IN-memory consumption for child [%d] = [%ld] kb\n", pid, rss << (__PAGE_SHIFT - 10)); | |
if(memconfig.verbose) | |
malloc_stats(); | |
} | |
static void spawn_children(void) | |
{ | |
register int i; | |
int objs_per_child = memconfig.allocs/memconfig.childs; | |
int rem_objs = memconfig.allocs % objs_per_child; | |
for(i = 0; i < memconfig.childs; ++i) | |
{ | |
int pid; | |
int start = i*objs_per_child; | |
int count = objs_per_child; | |
if(i+1 == memconfig.childs) | |
{ | |
count += rem_objs; | |
} | |
switch( pid = fork()) | |
{ | |
case 0: | |
{ | |
free_chunk(start, count); | |
exit(0); | |
} | |
break; | |
case -1: | |
continue; | |
default: | |
break; | |
} | |
} | |
while(wait(NULL) != -1 ); | |
} | |
static void test_malloc(void) | |
{ | |
register int i; | |
if(!mem_pool) | |
{ | |
mem_pool = calloc(memconfig.allocs, sizeof(*mem_pool)); | |
assert(mem_pool); | |
} | |
for(i = 0; i < memconfig.allocs; ++i) | |
{ | |
mem_pool[i] = malloc(memconfig.chunk_size); | |
assert(mem_pool[i]); | |
memset(mem_pool[i], 0, memconfig.chunk_size); /*touch*/ | |
} | |
printf("RSS for parent [%d] = [%ld] Kb\n", getpid(), get_pid_stat(getpid(), GET_RSS) << (__PAGE_SHIFT - 10)); | |
spawn_children(); | |
} | |
static void get_page_shift(void) | |
{ | |
int i; | |
page_size = sysconf(_SC_PAGESIZE); | |
for(i = 0; (1 << i) < page_size; ++i); | |
page_shift = i; | |
} | |
static int get_chunk_size(const char *s) | |
{ | |
char *e = NULL; | |
int chunk_size = (int)strtol(s, &e, 10); | |
int shift = 0; | |
switch(tolower(*e)) | |
{ | |
case 'k': | |
shift = 10; | |
break; | |
case 'm': | |
shift = 20; | |
break; | |
default: | |
break; | |
} | |
chunk_size <<= shift; | |
chunk_size += (page_size-1); | |
chunk_size &= ~(page_size-1); | |
return chunk_size; | |
} | |
static char *prog; | |
static void usage(void) | |
{ | |
fprintf(stderr, "%s [OPTION]\n" | |
"-h\t\t usage\n" | |
"-a\t\t number of allocations\n" | |
"-s\t\t chunk size to allocate (examples: 32k, 512k, 1m)\n" | |
"-c\t\t children to spawn for freeing chunks\n" | |
"-v\t\t verbose mode that dumps malloc_stats\n" | |
"-d\t\t dont free the chunks in the child\n", | |
prog); | |
exit(1); | |
} | |
int main(int argc, char **argv) | |
{ | |
int c; | |
opterr = 0; | |
get_page_shift(); | |
if( (prog = strrchr(argv[0], '/') ) ) | |
++prog; | |
else prog = argv[0]; | |
while ( ( c = getopt(argc, argv, "c:a:s:vdh") ) != EOF ) | |
switch(c) | |
{ | |
case 'c': | |
memconfig.childs = atoi(optarg); | |
break; | |
case 's': | |
memconfig.chunk_size = get_chunk_size(optarg); | |
break; | |
case 'a': | |
memconfig.allocs = atoi(optarg); | |
break; | |
case 'v': | |
memconfig.verbose = 1; | |
break; | |
case 'd': | |
memconfig.dont_free = 1; | |
break; | |
case '?': | |
case 'h': | |
default: | |
usage(); | |
} | |
if(optind != argc) usage(); | |
if(!memconfig.allocs) | |
memconfig.allocs = NUM_ALLOCS_DEFAULT; | |
if(!memconfig.chunk_size) | |
memconfig.chunk_size = CHUNK_SIZE_DEFAULT; | |
if(!memconfig.childs) | |
memconfig.childs = NUM_CHILDS_DEFAULT; | |
printf("Page size [%d], childs [%d], chunk size [%d], allocs [%d], memory allocated [%d] bytes\n", | |
page_size, memconfig.childs, memconfig.chunk_size, memconfig.allocs, memconfig.allocs * memconfig.chunk_size); | |
test_malloc(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment