From ilya Thu Mar 12 22:09:46 1998 Subject: Re: "newer" malloc [PATCH 5.004_62] To: lukka@fas.harvard.edu (Tuomas Lukka) Date: Thu, 12 Mar 1998 22:09:46 -0500 (EST) Cc: koenig@kulturbox.de (Andreas J. Koenig), perl5-porters@perl.org (Mailing list Perl5) In-Reply-To: <Pine.OSF.3.96.980312091901.16862B-100000@login5.fas.harvard.edu> from "Tuomas Lukka" at Mar 12, 1998 09:19:26 AM X-Mailer: ELM [version 2.5 PL0b1] Content-Length: 46996 Status: OR Tuomas Lukka writes: > > On 12 Mar 1998, Andreas J. Koenig wrote: > > > >>>>> On Wed, 11 Mar 1998 23:06:13 -0500 (EST), Ilya Zakharevich <ilya@math.ohio-state.edu> said: > > > > ilya> If there is any interest in merging it, I may try to build _62 and > > ilya> merge with the changes 56--->62, > > > > Yes, I'm in favor of malloc patches. Personally I'm quite unable to > > keep track of your patches and only use them occasionally. But I > > believe, I could really benefit from them. > > Another vote in favor... OK, OK, the benchmark results and the patch is below. Note that the slightly slower find-bucket is competing with significantly-improved sbrk()-algorithm (and lower memory footprint?) in the examples below. Otherwise I cannot explain why the results are varying so widely... > Of course, I'm also dreaming of lvalue subs... Well, if malloc patch goes in (hint hint), and if subroutines attributes are allowed now (are they?) I may even do it some time soon... The older way is frowned upon now, so without working sub attributes I see no way to redo this patch... Ilya A) perl-5.00462 path = ../perl5.004_62/perl cc = cc optimize = -O ccflags = -I/opt/local/include -I/opt/gnu/include usemymalloc = y B) perl-5.00462 path = ../perl5.004_62/perl-mymalloc cc = cc optimize = -O ccflags = -I/opt/local/include -I/opt/gnu/include usemymalloc = y A B ---- ---- arith/mixed 100 121 arith/trig 100 99 array/copy 100 98 array/foreach 100 96 array/index 100 87 array/pop 100 98 array/shift 100 100 call/0arg-noamp 100 93 call/0arg 100 97 call/1arg 100 104 call/2arg 100 103 call/9arg 100 101 call/empty 100 88 call/fib 100 103 call/method 100 101 call/wantarray 100 103 hash/copy 100 89 hash/each 100 89 hash/foreach-sort 100 82 hash/foreach 100 101 hash/get 100 111 hash/set 100 103 loop/for-c 100 103 loop/for-interval 100 106 loop/getline 100 96 loop/while-my 100 113 loop/while 100 107 re/const 100 102 re/w 100 96 startup/fewmod 100 108 startup/lotsofsub 100 111 startup/noprog 100 101 string/base64 100 97 string/htmlparser 100 98 string/index-const 100 98 string/index-var 100 92 string/ipol 100 86 AVERAGE 100 99 --- ./malloc.c Thu Mar 12 21:00:44 1998 +++ ./malloc.c-mymalloc Thu Mar 12 21:00:03 1998 @@ -2,14 +2,52 @@ * */ -#if defined(PERL_CORE) && !defined(DEBUGGING_MSTATS) -# define DEBUGGING_MSTATS +#ifndef NO_FANCY_MALLOC +# ifndef SMALL_BUCKET_VIA_TABLE +# define SMALL_BUCKET_VIA_TABLE +# endif +# ifndef BUCKETS_ROOT2 +# define BUCKETS_ROOT2 +# endif +# ifndef IGNORE_SMALL_BAD_FREE +# define IGNORE_SMALL_BAD_FREE +# endif #endif +#ifndef PLAIN_MALLOC /* Bulk enable features */ +# ifndef PACK_MALLOC +# define PACK_MALLOC +# endif +# ifndef TWO_POT_OPTIMIZE +# define TWO_POT_OPTIMIZE +# endif +# if defined(PERL_CORE) && !defined(EMERGENCY_SBRK) +# define EMERGENCY_SBRK +# endif +# if defined(PERL_CORE) && !defined(DEBUGGING_MSTATS) +# define DEBUGGING_MSTATS +# endif +#endif + +#define MIN_BUC_POW2 (sizeof(void*) > 4 ? 3 : 2) /* Allow for 4-byte arena. */ +#define MIN_BUCKET (MIN_BUC_POW2 * BUCKETS_PER_POW2) + +#if !(defined(I286) || defined(atarist)) + /* take 2k unless the block is bigger than that */ +# define LOG_OF_MIN_ARENA 11 +#else + /* take 16k unless the block is bigger than that + (80286s like large segments!), probably good on the atari too */ +# define LOG_OF_MIN_ARENA 14 +#endif + #ifndef lint # if defined(DEBUGGING) && !defined(NO_RCHECK) # define RCHECK # endif +# if defined(RCHECK) && defined(IGNORE_SMALL_BAD_FREE) +# undef IGNORE_SMALL_BAD_FREE +# endif /* * malloc.c (Caltech) 2/21/82 * Chris Kingsley, kingsley@cit-20. @@ -26,23 +64,50 @@ #include "EXTERN.h" #include "perl.h" +#ifndef PERL_CORE +# ifndef croak /* make depend */ +# define croak(mess) fprintf(stderr,mess); exit(1); +# endif +# ifdef DEBUG_m +# undef DEBUG_m +# endif +# define DEBUG_m(a) +# ifdef DEBUGGING +# undef DEBUGGING +# endif +#endif + +#ifndef MUTEX_LOCK +# define MUTEX_LOCK(l) +#endif + +#ifndef MUTEX_UNLOCK +# define MUTEX_UNLOCK(l) +#endif + #ifdef DEBUGGING -#undef DEBUG_m -#define DEBUG_m(a) if (debug & 128) a +# undef DEBUG_m +# define DEBUG_m(a) if (debug & 128) a #endif /* I don't much care whether these are defined in sys/types.h--LAW */ #define u_char unsigned char #define u_int unsigned int + +#ifdef HAS_QUAD +# define u_bigint UV /* Needs to eat *void. */ +#else /* needed? */ +# define u_bigint unsigned long /* Needs to eat *void. */ +#endif + #define u_short unsigned short /* 286 and atarist like big chunks, which gives too much overhead. */ #if (defined(RCHECK) || defined(I286) || defined(atarist)) && defined(PACK_MALLOC) -#undef PACK_MALLOC +# undef PACK_MALLOC #endif - /* * The description below is applicable if PACK_MALLOC is not defined. * @@ -81,72 +146,205 @@ static int findbucket _((union overhead #define MAGIC 0xff /* magic # on accounting info */ #define RMAGIC 0x55555555 /* magic # on range info */ +#define RMAGIC_C 0x55 /* magic # on range info */ + #ifdef RCHECK # define RSLOP sizeof (u_int) # ifdef TWO_POT_OPTIMIZE -# define MAX_SHORT_BUCKET 12 +# define MAX_SHORT_BUCKET (12 * BUCKETS_PER_POW2) # else -# define MAX_SHORT_BUCKET 13 +# define MAX_SHORT_BUCKET (13 * BUCKETS_PER_POW2) # endif #else # define RSLOP 0 #endif +#if !defined(PACK_MALLOC) && defined(BUCKETS_ROOT2) +# undef BUCKETS_ROOT2 +#endif + +#ifdef BUCKETS_ROOT2 +# define BUCKET_TABLE_SHIFT 2 +# define BUCKET_POW2_SHIFT 1 +# define BUCKETS_PER_POW2 2 +#else +# define BUCKET_TABLE_SHIFT MIN_BUC_POW2 +# define BUCKET_POW2_SHIFT 0 +# define BUCKETS_PER_POW2 1 +#endif + +#ifdef BUCKETS_ROOT2 +# define MAX_BUCKET_BY_TABLE 13 +static u_short buck_size[MAX_BUCKET_BY_TABLE + 1] = + { + 0, 0, 0, 0, 4, 4, 8, 12, 16, 24, 32, 48, 64, 80, + }; +# define BUCKET_SIZE(i) ((i) % 2 ? buck_size[i] : (1 << ((i) >> BUCKET_POW2_SHIFT))) +# define BUCKET_SIZE_REAL(i) ((i) <= MAX_BUCKET_BY_TABLE \ + ? buck_size[i] \ + : ((1 << ((i) >> BUCKET_POW2_SHIFT)) \ + - MEM_OVERHEAD(i) \ + + POW2_OPTIMIZE_SURPLUS(i))) +#else +# define BUCKET_SIZE(i) (1 << ((i) >> BUCKET_POW2_SHIFT)) +# define BUCKET_SIZE_REAL(i) (BUCKET_SIZE(i) - MEM_OVERHEAD(i) + POW2_OPTIMIZE_SURPLUS(i)) +#endif + + #ifdef PACK_MALLOC -/* - * In this case it is assumed that if we do sbrk() in 2K units, we - * will get 2K aligned blocks. The bucket number of the given subblock is - * on the boundary of 2K block which contains the subblock. - * Several following bytes contain the magic numbers for the subblocks - * in the block. +/* In this case it is assumed that if we do sbrk() in 2K units, we + * will get 2K aligned arenas (at least after some initial + * alignment). The bucket number of the given subblock is on the start + * of 2K arena which contains the subblock. Several following bytes + * contain the magic numbers for the subblocks in the block. * * Sizes of chunks are powers of 2 for chunks in buckets <= * MAX_PACKED, after this they are (2^n - sizeof(union overhead)) (to * get alignment right). * - * We suppose that starts of all the chunks in a 2K block are in - * different 2^n-byte-long chunks. If the top of the last chunk is - * aligned on a boundary of 2K block, this means that - * sizeof(union overhead)*"number of chunks" < 2^n, or - * sizeof(union overhead)*2K < 4^n, or n > 6 + log2(sizeof()/2)/2, if a - * chunk of size 2^n - overhead is used. Since this rules out n = 7 - * for 8 byte alignment, we specialcase allocation of the first of 16 - * 128-byte-long chunks. + * Consider an arena for 2^n with n>MAX_PACKED. We suppose that + * starts of all the chunks in a 2K arena are in different + * 2^n-byte-long chunks. If the top of the last chunk is aligned on a + * boundary of 2K block, this means that sizeof(union + * overhead)*"number of chunks" < 2^n, or sizeof(union overhead)*2K < + * 4^n, or n > 6 + log2(sizeof()/2)/2, since a chunk of size 2^n - + * overhead is used. Since this rules out n = 7 for 8 byte alignment, + * we specialcase allocation of the first of 16 128-byte-long chunks. * * Note that with the above assumption we automatically have enough * place for MAGIC at the start of 2K block. Note also that we - * overlay union overhead over the chunk, thus the start of the chunk - * is immediately overwritten after freeing. - */ -# define MAX_PACKED 6 -# define MAX_2_POT_ALGO ((1<<(MAX_PACKED + 1)) - M_OVERHEAD) -# define TWOK_MASK ((1<<11) - 1) -# define TWOK_MASKED(x) ((u_int)(x) & ~TWOK_MASK) -# define TWOK_SHIFT(x) ((u_int)(x) & TWOK_MASK) + * overlay union overhead over the chunk, thus the start of small chunks + * is immediately overwritten after freeing. */ +# define MAX_PACKED_POW2 6 +# define MAX_PACKED (MAX_PACKED_POW2 * BUCKETS_PER_POW2 + BUCKET_POW2_SHIFT) +# define MAX_POW2_ALGO ((1<<(MAX_PACKED_POW2 + 1)) - M_OVERHEAD) +# define TWOK_MASK ((1<<LOG_OF_MIN_ARENA) - 1) +# define TWOK_MASKED(x) ((u_bigint)(x) & ~TWOK_MASK) +# define TWOK_SHIFT(x) ((u_bigint)(x) & TWOK_MASK) # define OV_INDEXp(block) ((u_char*)(TWOK_MASKED(block))) # define OV_INDEX(block) (*OV_INDEXp(block)) # define OV_MAGIC(block,bucket) (*(OV_INDEXp(block) + \ - (TWOK_SHIFT(block)>>(bucket + 3)) + \ - (bucket > MAX_NONSHIFT ? 1 : 0))) + (TWOK_SHIFT(block)>> \ + (bucket>>BUCKET_POW2_SHIFT)) + \ + (bucket >= MIN_NEEDS_SHIFT ? 1 : 0))) + /* A bucket can have a shift smaller than it size, we need to + shift its magic number so it will not overwrite index: */ +# ifdef BUCKETS_ROOT2 +# define MIN_NEEDS_SHIFT (7*BUCKETS_PER_POW2 - 1) /* Shift 80 greater than chunk 64. */ +# else +# define MIN_NEEDS_SHIFT (7*BUCKETS_PER_POW2) /* Shift 128 greater than chunk 32. */ +# endif # define CHUNK_SHIFT 0 -static u_char n_blks[11 - 3] = {224, 120, 62, 31, 16, 8, 4, 2}; -static u_short blk_shift[11 - 3] = {256, 128, 64, 32, - 16*sizeof(union overhead), - 8*sizeof(union overhead), - 4*sizeof(union overhead), - 2*sizeof(union overhead), -# define MAX_NONSHIFT 2 /* Shift 64 greater than chunk 32. */ -}; +/* Number of active buckets of given ordinal. */ +#ifdef IGNORE_SMALL_BAD_FREE +#define FIRST_BUCKET_WITH_CHECK (6 * BUCKETS_PER_POW2) /* 64 */ +# define N_BLKS(bucket) ( (bucket) < FIRST_BUCKET_WITH_CHECK \ + ? ((1<<LOG_OF_MIN_ARENA) - 1)/BUCKET_SIZE(bucket) \ + : n_blks[bucket] ) +#else +# define N_BLKS(bucket) n_blks[bucket] +#endif + +static u_short n_blks[LOG_OF_MIN_ARENA * BUCKETS_PER_POW2] = + { +# if BUCKETS_PER_POW2==1 + 0, 0, + (MIN_BUC_POW2==2 ? 384 : 0), + 224, 120, 62, 31, 16, 8, 4, 2 +# else + 0, 0, 0, 0, + (MIN_BUC_POW2==2 ? 384 : 0), (MIN_BUC_POW2==2 ? 384 : 0), /* 4, 4 */ + 224, 149, 120, 80, 62, 41, 31, 25, 16, 16, 8, 8, 4, 4, 2, 2 +# endif + }; + +/* Shift of the first bucket with the given ordinal inside 2K chunk. */ +#ifdef IGNORE_SMALL_BAD_FREE +# define BLK_SHIFT(bucket) ( (bucket) < FIRST_BUCKET_WITH_CHECK \ + ? ((1<<LOG_OF_MIN_ARENA) \ + - BUCKET_SIZE(bucket) * N_BLKS(bucket)) \ + : blk_shift[bucket]) +#else +# define BLK_SHIFT(bucket) blk_shift[bucket] +#endif + +static u_short blk_shift[LOG_OF_MIN_ARENA * BUCKETS_PER_POW2] = + { +# if BUCKETS_PER_POW2==1 + 0, 0, + (MIN_BUC_POW2==2 ? 512 : 0), + 256, 128, 64, 64, /* 8 to 64 */ + 16*sizeof(union overhead), + 8*sizeof(union overhead), + 4*sizeof(union overhead), + 2*sizeof(union overhead), +# else + 0, 0, 0, 0, + (MIN_BUC_POW2==2 ? 512 : 0), (MIN_BUC_POW2==2 ? 512 : 0), + 256, 260, 128, 128, 64, 80, 64, 48, /* 8 to 96 */ + 16*sizeof(union overhead), 16*sizeof(union overhead), + 8*sizeof(union overhead), 8*sizeof(union overhead), + 4*sizeof(union overhead), 4*sizeof(union overhead), + 2*sizeof(union overhead), 2*sizeof(union overhead), +# endif + }; #else /* !PACK_MALLOC */ # define OV_MAGIC(block,bucket) (block)->ov_magic # define OV_INDEX(block) (block)->ov_index # define CHUNK_SHIFT 1 +# define MAX_PACKED -1 #endif /* !PACK_MALLOC */ -# define M_OVERHEAD (sizeof(union overhead) + RSLOP) +#define M_OVERHEAD (sizeof(union overhead) + RSLOP) + +#ifdef PACK_MALLOC +# define MEM_OVERHEAD(bucket) \ + (bucket <= MAX_PACKED ? 0 : M_OVERHEAD) +# ifdef SMALL_BUCKET_VIA_TABLE +# define START_SHIFTS_BUCKET ((MAX_PACKED_POW2 + 1) * BUCKETS_PER_POW2) +# define START_SHIFT MAX_PACKED_POW2 +# ifdef BUCKETS_ROOT2 /* Chunks of size 3*2^n. */ +# define SIZE_TABLE_MAX 80 +# else +# define SIZE_TABLE_MAX 64 +# endif +static char bucket_of[] = + { +# ifdef BUCKETS_ROOT2 /* Chunks of size 3*2^n. */ + /* 0 to 15 in 4-byte increments. */ + (sizeof(void*) > 4 ? 6 : 5), /* 4/8, 5-th bucket for better reports */ + 6, /* 8 */ + 7, 8, /* 12, 16 */ + 9, 9, 10, 10, /* 24, 32 */ + 11, 11, 11, 11, /* 48 */ + 12, 12, 12, 12, /* 64 */ + 13, 13, 13, 13, /* 80 */ + 13, 13, 13, 13 /* 80 */ +# else /* !BUCKETS_ROOT2 */ + /* 0 to 15 in 4-byte increments. */ + (sizeof(void*) > 4 ? 3 : 2), + 3, + 4, 4, + 5, 5, 5, 5, + 6, 6, 6, 6, + 6, 6, 6, 6 +# endif /* !BUCKETS_ROOT2 */ + }; +# else /* !SMALL_BUCKET_VIA_TABLE */ +# define START_SHIFTS_BUCKET MIN_BUCKET +# define START_SHIFT (MIN_BUC_POW2 - 1) +# endif /* !SMALL_BUCKET_VIA_TABLE */ +#else /* !PACK_MALLOC */ +# define MEM_OVERHEAD(bucket) M_OVERHEAD +# ifdef SMALL_BUCKET_VIA_TABLE +# undef SMALL_BUCKET_VIA_TABLE +# endif +# define START_SHIFTS_BUCKET MIN_BUCKET +# define START_SHIFT (MIN_BUC_POW2 - 1) +#endif /* !PACK_MALLOC */ /* * Big allocations are often of the size 2^n bytes. To make them a @@ -158,23 +356,63 @@ static u_short blk_shift[11 - 3] = {256, # ifndef PERL_PAGESIZE # define PERL_PAGESIZE 4096 # endif -# ifndef FIRST_BIG_TWO_POT -# define FIRST_BIG_TWO_POT 14 /* 16K */ +# ifndef FIRST_BIG_POW2 +# define FIRST_BIG_POW2 15 /* 32K, 16K is used too often. */ # endif -# define FIRST_BIG_BLOCK (1<<FIRST_BIG_TWO_POT) /* 16K */ +# define FIRST_BIG_BLOCK (1<<FIRST_BIG_POW2) /* If this value or more, check against bigger blocks. */ # define FIRST_BIG_BOUND (FIRST_BIG_BLOCK - M_OVERHEAD) /* If less than this value, goes into 2^n-overhead-block. */ # define LAST_SMALL_BOUND ((FIRST_BIG_BLOCK>>1) - M_OVERHEAD) -#endif /* TWO_POT_OPTIMIZE */ +# define POW2_OPTIMIZE_ADJUST(nbytes) \ + ((nbytes >= FIRST_BIG_BOUND) ? nbytes -= PERL_PAGESIZE : 0) +# define POW2_OPTIMIZE_SURPLUS(bucket) \ + ((bucket >= FIRST_BIG_POW2 * BUCKETS_PER_POW2) ? PERL_PAGESIZE : 0) + +#else /* !TWO_POT_OPTIMIZE */ +# define POW2_OPTIMIZE_ADJUST(nbytes) +# define POW2_OPTIMIZE_SURPLUS(bucket) 0 +#endif /* !TWO_POT_OPTIMIZE */ + +#if defined(HAS_64K_LIMIT) && defined(PERL_CORE) +# define BARK_64K_LIMIT(what,nbytes,size) \ + if (nbytes > 0xffff) { \ + PerlIO_printf(PerlIO_stderr(), \ + "%s too large: %lx\n", what, size); \ + my_exit(1); \ + } +#else /* !HAS_64K_LIMIT || !PERL_CORE */ +# define BARK_64K_LIMIT(what,nbytes,size) +#endif /* !HAS_64K_LIMIT || !PERL_CORE */ + +#ifndef MIN_SBRK +# define MIN_SBRK 2048 +#endif -#if defined(PERL_EMERGENCY_SBRK) && defined(PERL_CORE) +#ifndef FIRST_SBRK +# define FIRST_SBRK (32*1024) +#endif + +/* Minimal sbrk in percents of what is already alloced. */ +#ifndef MIN_SBRK_FRAC +# define MIN_SBRK_FRAC 3 +#endif + +#ifndef SBRK_ALLOW_FAILURES +# define SBRK_ALLOW_FAILURES 3 +#endif -#ifndef BIG_SIZE -# define BIG_SIZE (1<<16) /* 64K */ +#ifndef SBRK_FAILURE_PRICE +# define SBRK_FAILURE_PRICE 50 #endif +#if defined(PERL_EMERGENCY_SBRK) && defined(PERL_CORE) + +# ifndef BIG_SIZE +# define BIG_SIZE (1<<16) /* 64K */ +# endif + static char *emergency_buffer; static MEM_SIZE emergency_buffer_size; @@ -198,13 +436,13 @@ emergency_sbrk(size) if (!gvp) gvp = (GV**)hv_fetch(defstash, "\015", 1, 0); if (!gvp || !(sv = GvSV(*gvp)) || !SvPOK(sv) - || (SvLEN(sv) < (1<<11) - M_OVERHEAD)) + || (SvLEN(sv) < (1<<LOG_OF_MIN_ARENA) - M_OVERHEAD)) return (char *)-1; /* Now die die die... */ /* Got it, now detach SvPV: */ pv = SvPV(sv, na); /* Check alignment: */ - if (((u_int)(pv - M_OVERHEAD)) & ((1<<11) - 1)) { + if (((u_bigint)(pv - M_OVERHEAD)) & ((1<<LOG_OF_MIN_ARENA) - 1)) { PerlIO_puts(PerlIO_stderr(),"Bad alignment of $^M!\n"); return (char *)-1; /* die die die */ } @@ -223,16 +461,16 @@ emergency_sbrk(size) return (char *)-1; /* poor guy... */ } -#else /* !(defined(TWO_POT_OPTIMIZE) && defined(PERL_CORE)) */ +#else /* !(defined(PERL_EMERGENCY_SBRK) && defined(PERL_CORE)) */ # define emergency_sbrk(size) -1 -#endif /* !(defined(TWO_POT_OPTIMIZE) && defined(PERL_CORE)) */ +#endif /* !(defined(PERL_EMERGENCY_SBRK) && defined(PERL_CORE)) */ /* - * nextf[i] is the pointer to the next free block of size 2^(i+3). The + * nextf[i] is the pointer to the next free block of size 2^i. The * smallest allocatable block is 8 bytes. The overhead information * precedes the data area returned to the user. */ -#define NBUCKETS 30 +#define NBUCKETS (32*BUCKETS_PER_POW2 + 1) static union overhead *nextf[NBUCKETS]; #ifdef USE_PERL_SBRK @@ -254,11 +492,12 @@ extern Malloc_t sbrk(int); * for a given block size. */ static u_int nmalloc[NBUCKETS]; -static u_int goodsbrk; static u_int sbrk_slack; static u_int start_slack; #endif +static u_int goodsbrk; + #ifdef DEBUGGING #define ASSERT(p) if (!(p)) botch(STRINGIFY(p)); else static void @@ -275,26 +514,18 @@ Malloc_t malloc(register size_t nbytes) { register union overhead *p; - register int bucket = 0; + register int bucket; register MEM_SIZE shiftr; #if defined(DEBUGGING) || defined(RCHECK) MEM_SIZE size = nbytes; #endif -#ifdef PERL_CORE -#ifdef HAS_64K_LIMIT - if (nbytes > 0xffff) { - PerlIO_printf(PerlIO_stderr(), - "Allocation too large: %lx\n", (long)nbytes); - my_exit(1); - } -#endif /* HAS_64K_LIMIT */ + BARK_64K_LIMIT("Allocation",nbytes,nbytes); #ifdef DEBUGGING if ((long)nbytes < 0) croak("panic: malloc"); #endif -#endif /* PERL_CORE */ MUTEX_LOCK(&malloc_mutex); /* @@ -304,29 +535,37 @@ malloc(register size_t nbytes) * space used per block for accounting. */ #ifdef PACK_MALLOC +# ifdef SMALL_BUCKET_VIA_TABLE + if (nbytes == 0) + bucket = MIN_BUCKET; + else if (nbytes <= SIZE_TABLE_MAX) { + bucket = bucket_of[(nbytes - 1) >> BUCKET_TABLE_SHIFT]; + } else +# else if (nbytes == 0) nbytes = 1; - else if (nbytes > MAX_2_POT_ALGO) -#endif - { -#ifdef TWO_POT_OPTIMIZE - if (nbytes >= FIRST_BIG_BOUND) - nbytes -= PERL_PAGESIZE; + if (nbytes <= MAX_POW2_ALGO) goto do_shifts; + else +# endif #endif - nbytes += M_OVERHEAD; - nbytes = (nbytes + 3) &~ 3; + { + POW2_OPTIMIZE_ADJUST(nbytes); + nbytes += M_OVERHEAD; + nbytes = (nbytes + 3) &~ 3; + do_shifts: + shiftr = (nbytes - 1) >> START_SHIFT; + bucket = START_SHIFTS_BUCKET; + /* apart from this loop, this is O(1) */ + while (shiftr >>= 1) + bucket += BUCKETS_PER_POW2; } - shiftr = (nbytes - 1) >> 2; - /* apart from this loop, this is O(1) */ - while (shiftr >>= 1) - bucket++; /* * If nothing in hash bucket right now, * request more memory from the system. */ if (nextf[bucket] == NULL) morecore(bucket); - if ((p = (union overhead *)nextf[bucket]) == NULL) { + if ((p = nextf[bucket]) == NULL) { MUTEX_UNLOCK(&malloc_mutex); #ifdef PERL_CORE if (!nomemok) { @@ -338,10 +577,10 @@ malloc(register size_t nbytes) #endif } -#ifdef PERL_CORE - DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%lx: (%05lu) malloc %ld bytes\n", - (unsigned long)(p+1),(unsigned long)(an++),(long)size)); -#endif /* PERL_CORE */ + DEBUG_m(PerlIO_printf(Perl_debug_log, + "0x%lx: (%05lu) malloc %ld bytes\n", + (unsigned long)(p+1), (unsigned long)(an++), + (long)size)); /* remove from linked list */ #ifdef RCHECK @@ -350,7 +589,10 @@ malloc(register size_t nbytes) (unsigned long)*((int*)p),(unsigned long)p); #endif nextf[bucket] = p->ov_next; - OV_MAGIC(p, bucket) = MAGIC; +#ifdef IGNORE_SMALL_BAD_FREE + if (bucket >= FIRST_BUCKET_WITH_CHECK) +#endif + OV_MAGIC(p, bucket) = MAGIC; #ifndef PACK_MALLOC OV_INDEX(p) = bucket; #endif @@ -359,16 +601,124 @@ malloc(register size_t nbytes) * Record allocated size of block and * bound space with magic numbers. */ - nbytes = (size + M_OVERHEAD + 3) &~ 3; - if (nbytes <= 0x10000) - p->ov_size = nbytes - 1; p->ov_rmagic = RMAGIC; - *((u_int *)((caddr_t)p + nbytes - RSLOP)) = RMAGIC; + if (bucket <= MAX_SHORT_BUCKET) { + int i; + + nbytes = size + M_OVERHEAD; + p->ov_size = nbytes - 1; + if ((i = nbytes & 3)) { + i = 4 - i; + while (i--) + *((char *)((caddr_t)p + nbytes - RSLOP + i)) = RMAGIC_C; + } + nbytes = (nbytes + 3) &~ 3; + *((u_int *)((caddr_t)p + nbytes - RSLOP)) = RMAGIC; + } #endif MUTEX_UNLOCK(&malloc_mutex); return ((Malloc_t)(p + CHUNK_SHIFT)); } +static char *last_sbrk_top; +static char *last_op; /* This arena can be easily extended. */ +static int sbrked_remains; +static int sbrk_good = SBRK_ALLOW_FAILURES * SBRK_FAILURE_PRICE; + +#ifdef DEBUGGING_MSTATS +static int sbrks; +#endif + +struct chunk_chain_s { + struct chunk_chain_s *next; + MEM_SIZE size; +}; +static struct chunk_chain_s *chunk_chain; +static int n_chunks; +static char max_bucket; + +/* Cutoff a piece of one of the chunks in the chain. Prefer smaller chunk. */ +static void * +get_from_chain(MEM_SIZE size) +{ + struct chunk_chain_s *elt = chunk_chain, **oldp = &chunk_chain; + struct chunk_chain_s **oldgoodp = NULL; + long min_remain = LONG_MAX; + + while (elt) { + if (elt->size >= size) { + long remains = elt->size - size; + if (remains >= 0 && remains < min_remain) { + oldgoodp = oldp; + min_remain = remains; + } + if (remains == 0) { + break; + } + } + oldp = &( elt->next ); + elt = elt->next; + } + if (!oldgoodp) return NULL; + if (min_remain) { + void *ret = *oldgoodp; + struct chunk_chain_s *next = (*oldgoodp)->next; + + *oldgoodp = (struct chunk_chain_s *)((char*)ret + size); + (*oldgoodp)->size = min_remain; + (*oldgoodp)->next = next; + return ret; + } else { + void *ret = *oldgoodp; + *oldgoodp = (*oldgoodp)->next; + n_chunks--; + return ret; + } +} + +static void +add_to_chain(void *p, MEM_SIZE size, MEM_SIZE chip) +{ + struct chunk_chain_s *next = chunk_chain; + char *cp = (char*)p; + + cp += chip; + chunk_chain = (struct chunk_chain_s *)cp; + chunk_chain->size = size - chip; + chunk_chain->next = next; + n_chunks++; +} + +static void * +get_from_bigger_buckets(int bucket, MEM_SIZE size) +{ + int price = 1; + static int bucketprice[NBUCKETS]; + while (bucket <= max_bucket) { + /* We postpone stealing from bigger buckets until we want it + often enough. */ + if (nextf[bucket] && bucketprice[bucket]++ >= price) { + /* Steal it! */ + void *ret = (void*)(nextf[bucket] - 1 + CHUNK_SHIFT); + bucketprice[bucket] = 0; + if (((char*)nextf[bucket]) - M_OVERHEAD == last_op) { + last_op = NULL; /* Disable optimization */ + } + nextf[bucket] = nextf[bucket]->ov_next; +#ifdef DEBUGGING_MSTATS + nmalloc[bucket]--; + start_slack -= M_OVERHEAD; +#endif + add_to_chain(ret, (BUCKET_SIZE(bucket) + + POW2_OPTIMIZE_SURPLUS(bucket)), + size); + return ret; + } + bucket++; + } + return NULL; +} + /* * Allocate more memory to the indicated bucket. */ @@ -383,90 +733,206 @@ morecore(register int bucket) if (nextf[bucket]) return; - if (bucket == (sizeof(MEM_SIZE)*8 - 3)) { + if (bucket == sizeof(MEM_SIZE)*8*BUCKETS_PER_POW2) { croak("Allocation too large"); } - /* - * Insure memory is allocated - * on a page boundary. Should - * make getpageize call? - */ -#ifndef atarist /* on the atari we dont have to worry about this */ - ovp = (union overhead *)sbrk(0); -# ifndef I286 - if ((UV)ovp & (0x7FF >> CHUNK_SHIFT)) { - slack = (0x800 >> CHUNK_SHIFT) - ((UV)ovp & (0x7FF >> CHUNK_SHIFT)); - (void)sbrk(slack); -# if defined(DEBUGGING_MSTATS) - sbrk_slack += slack; -# endif + + if (bucket > max_bucket) { + max_bucket = bucket; } -# else - /* The sbrk(0) call on the I286 always returns the next segment */ + rnu = ( (bucket <= (LOG_OF_MIN_ARENA << BUCKET_POW2_SHIFT)) + ? LOG_OF_MIN_ARENA + : (bucket >> BUCKET_POW2_SHIFT) ); + /* This may be overwritten later: */ + nblks = 1 << (rnu - (bucket >> BUCKET_POW2_SHIFT)); /* how many blocks to get */ + needed = ((MEM_SIZE)1 << rnu) + POW2_OPTIMIZE_SURPLUS(bucket); + if (nextf[rnu << BUCKET_POW2_SHIFT]) { /* 2048b bucket. */ + ovp = nextf[rnu << BUCKET_POW2_SHIFT] - 1 + CHUNK_SHIFT; + nextf[rnu << BUCKET_POW2_SHIFT] + = nextf[rnu << BUCKET_POW2_SHIFT]->ov_next; +#ifdef DEBUGGING_MSTATS + nmalloc[rnu << BUCKET_POW2_SHIFT]--; + start_slack -= M_OVERHEAD; +#endif + DEBUG_m(PerlIO_printf(Perl_debug_log, + "stealing %ld bytes from %ld arena\n", + (long) needed, (long) rnu << BUCKET_POW2_SHIFT)); + } else if (chunk_chain + && (ovp = (union overhead*) get_from_chain(needed))) { + DEBUG_m(PerlIO_printf(Perl_debug_log, + "stealing %ld bytes from chain\n", + (long) needed)); + } else if (ovp = (union overhead*) + get_from_bigger_buckets((rnu << BUCKET_POW2_SHIFT) + 1, + needed)) { + DEBUG_m(PerlIO_printf(Perl_debug_log, + "stealing %ld bytes from bigger buckets\n", + (long) needed)); + } else if (needed <= sbrked_remains) { + ovp = (union overhead *)(last_sbrk_top - sbrked_remains); + sbrked_remains -= needed; + last_op = (char*)ovp; + } else { + /* Need to do (possibly expensive) system call. Try to + optimize it for rare calling. */ + MEM_SIZE require = needed - sbrked_remains; + char *cp; + + if (sbrk_good > 0) { + if (!last_sbrk_top && require < FIRST_SBRK) + require = FIRST_SBRK; + else if (require < MIN_SBRK) require = MIN_SBRK; + + if (require < goodsbrk * MIN_SBRK_FRAC / 100) + require = goodsbrk * MIN_SBRK_FRAC / 100; + require = ((require - 1 + MIN_SBRK) / MIN_SBRK) * MIN_SBRK; + } else { + require = needed; + last_sbrk_top = 0; + sbrked_remains = 0; + } + + DEBUG_m(PerlIO_printf(Perl_debug_log, + "sbrk(%ld) for %ld-byte-long arena\n", + (long)require, (long) needed)); + cp = (char *)sbrk(require); +#ifdef DEBUGGING_MSTATS + sbrks++; +#endif + if (cp == last_sbrk_top) { + /* Common case, anything is fine. */ + sbrk_good++; + ovp = (union overhead *) (cp - sbrked_remains); + sbrked_remains = require - (needed - sbrked_remains); + } else if (cp == (char *)-1) { /* no more room! */ + ovp = (union overhead *)emergency_sbrk(needed); + if (ovp == (union overhead *)-1) + return; + goto gotit; + } else { /* Non-continuous or first sbrk(). */ + long add = sbrked_remains; + char *newcp; + + if (sbrked_remains) { /* Put rest into chain, we + cannot use it right now. */ + add_to_chain((void*)(last_sbrk_top - sbrked_remains), + sbrked_remains, 0); + } + + /* Second, check alignment. */ + slack = 0; + +#ifndef atarist /* on the atari we dont have to worry about this */ +# ifndef I286 /* The sbrk(0) call on the I286 always returns the next segment */ + + /* CHUNK_SHIFT is 1 for PACK_MALLOC, 0 otherwise. */ + if ((UV)cp & (0x7FF >> CHUNK_SHIFT)) { /* Not aligned. */ + slack = (0x800 >> CHUNK_SHIFT) + - ((UV)cp & (0x7FF >> CHUNK_SHIFT)); + add += slack; + } # endif #endif /* atarist */ - -#if !(defined(I286) || defined(atarist)) - /* take 2k unless the block is bigger than that */ - rnu = (bucket <= 8) ? 11 : bucket + 3; -#else - /* take 16k unless the block is bigger than that - (80286s like large segments!), probably good on the atari too */ - rnu = (bucket <= 11) ? 14 : bucket + 3; + + if (add) { + DEBUG_m(PerlIO_printf(Perl_debug_log, +"sbrk(%ld) to fix non-continuous/off-page sbrk:\n\t%ld for alignement,\t%ld were assumed to come from the tail of the previous sbrk\n", + (long)add, (long) slack, + (long) sbrked_remains)); + newcp = (char *)sbrk(add); +#if defined(DEBUGGING_MSTATS) + sbrks++; + sbrk_slack += add; +#endif + if (newcp != cp + require) { + /* Too bad: even rounding sbrk() is not continuous.*/ + DEBUG_m(PerlIO_printf(Perl_debug_log, + "failed to fix bad sbrk()\n")); +#ifdef PACK_MALLOC + if (slack) + croak("panic: Off-page sbrk"); #endif - nblks = 1 << (rnu - (bucket + 3)); /* how many blocks to get */ - needed = (MEM_SIZE)1 << rnu; -#ifdef TWO_POT_OPTIMIZE - needed += (bucket >= (FIRST_BIG_TWO_POT - 3) ? PERL_PAGESIZE : 0); + if (sbrked_remains) { + /* Try again. */ +#if defined(DEBUGGING_MSTATS) + sbrk_slack += require; +#endif + require = needed; + DEBUG_m(PerlIO_printf(Perl_debug_log, + "straight sbrk(%ld)\n", + (long)require)); + cp = (char *)sbrk(require); +#ifdef DEBUGGING_MSTATS + sbrks++; #endif - ovp = (union overhead *)sbrk(needed); - /* no more room! */ - if (ovp == (union overhead *)-1) { - ovp = (union overhead *)emergency_sbrk(needed); - if (ovp == (union overhead *)-1) - return; - } + if (cp == (char *)-1) + return; + } + sbrk_good = -1; /* Disable optimization! + Continue with not-aligned... */ + } else { + cp += slack; + require += sbrked_remains; + } + } + + if (last_sbrk_top) { + sbrk_good -= SBRK_FAILURE_PRICE; + } + + ovp = (union overhead *) cp; + /* + * Round up to minimum allocation size boundary + * and deduct from block count to reflect. + */ + +#ifndef I286 /* Again, this should always be ok on an 80286 */ + if ((UV)ovp & 7) { + ovp = (union overhead *)(((UV)ovp + 8) & ~7); + DEBUG_m(PerlIO_printf(Perl_debug_log, + "fixing sbrk(): %d bytes off machine alignement\n", + (int)((UV)ovp & 7))); + nblks--; +# if defined(DEBUGGING_MSTATS) + /* This is only approx. if TWO_POT_OPTIMIZE: */ + sbrk_slack += (1 << bucket); +# endif + } +#endif + sbrked_remains = require - needed; + } + last_sbrk_top = cp + require; + last_op = (char*) cp; #ifdef DEBUGGING_MSTATS - goodsbrk += needed; + goodsbrk += require; #endif - /* - * Round up to minimum allocation size boundary - * and deduct from block count to reflect. - */ -#ifndef I286 -# ifdef PACK_MALLOC - if ((UV)ovp & 0x7FF) - croak("panic: Off-page sbrk"); -# endif - if ((UV)ovp & 7) { - ovp = (union overhead *)(((UV)ovp + 8) & ~7); - nblks--; - } -#else - /* Again, this should always be ok on an 80286 */ -#endif + } + + gotit: /* * Add new memory allocated to that on * free list for this hash bucket. */ - siz = 1 << (bucket + 3); + siz = BUCKET_SIZE(bucket); #ifdef PACK_MALLOC *(u_char*)ovp = bucket; /* Fill index. */ - if (bucket <= MAX_PACKED - 3) { - ovp = (union overhead *) ((char*)ovp + blk_shift[bucket]); - nblks = n_blks[bucket]; + if (bucket <= MAX_PACKED) { + ovp = (union overhead *) ((char*)ovp + BLK_SHIFT(bucket)); + nblks = N_BLKS(bucket); # ifdef DEBUGGING_MSTATS - start_slack += blk_shift[bucket]; + start_slack += BLK_SHIFT(bucket); # endif - } else if (bucket <= 11 - 1 - 3) { - ovp = (union overhead *) ((char*)ovp + blk_shift[bucket]); - /* nblks = n_blks[bucket]; */ + } else if (bucket < LOG_OF_MIN_ARENA * BUCKETS_PER_POW2) { + ovp = (union overhead *) ((char*)ovp + BLK_SHIFT(bucket)); siz -= sizeof(union overhead); } else ovp++; /* One chunk per block. */ -#endif /* !PACK_MALLOC */ +#endif /* PACK_MALLOC */ nextf[bucket] = ovp; #ifdef DEBUGGING_MSTATS nmalloc[bucket] += nblks; + if (bucket > MAX_PACKED) { + start_slack += M_OVERHEAD * nblks; + } #endif while (--nblks > 0) { ovp->ov_next = (union overhead *)((caddr_t)ovp + siz); @@ -475,11 +941,12 @@ morecore(register int bucket) /* Not all sbrks return zeroed memory.*/ ovp->ov_next = (union overhead *)NULL; #ifdef PACK_MALLOC - if (bucket == 7 - 3) { /* Special case, explanation is above. */ - union overhead *n_op = nextf[7 - 3]->ov_next; - nextf[7 - 3] = (union overhead *)((caddr_t)nextf[7 - 3] - - sizeof(union overhead)); - nextf[7 - 3]->ov_next = n_op; + if (bucket == 7*BUCKETS_PER_POW2) { /* Special case, explanation is above. */ + union overhead *n_op = nextf[7*BUCKETS_PER_POW2]->ov_next; + nextf[7*BUCKETS_PER_POW2] = + (union overhead *)((caddr_t)nextf[7*BUCKETS_PER_POW2] + - sizeof(union overhead)); + nextf[7*BUCKETS_PER_POW2]->ov_next = n_op; } #endif /* !PACK_MALLOC */ } @@ -494,18 +961,24 @@ free(void *mp) u_char bucket; #endif -#ifdef PERL_CORE - DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%lx: (%05lu) free\n",(unsigned long)cp,(unsigned long)(an++))); -#endif /* PERL_CORE */ + DEBUG_m(PerlIO_printf(Perl_debug_log, + "0x%lx: (%05lu) free\n", + (unsigned long)cp, (unsigned long)(an++))); if (cp == NULL) return; ovp = (union overhead *)((caddr_t)cp - - sizeof (union overhead) * CHUNK_SHIFT); + - sizeof (union overhead) * CHUNK_SHIFT); #ifdef PACK_MALLOC bucket = OV_INDEX(ovp); #endif - if (OV_MAGIC(ovp, bucket) != MAGIC) { +#ifdef IGNORE_SMALL_BAD_FREE + if ((bucket >= FIRST_BUCKET_WITH_CHECK) + && (OV_MAGIC(ovp, bucket) != MAGIC)) +#else + if (OV_MAGIC(ovp, bucket) != MAGIC) +#endif + { static int bad_free_warn = -1; if (bad_free_warn == -1) { char *pbf = PerlEnv_getenv("PERL_BADFREE"); @@ -520,12 +993,24 @@ free(void *mp) warn("Bad free() ignored"); #endif return; /* sanity */ - } + } MUTEX_LOCK(&malloc_mutex); #ifdef RCHECK ASSERT(ovp->ov_rmagic == RMAGIC); - if (OV_INDEX(ovp) <= MAX_SHORT_BUCKET) - ASSERT(*(u_int *)((caddr_t)ovp + ovp->ov_size + 1 - RSLOP) == RMAGIC); + if (OV_INDEX(ovp) <= MAX_SHORT_BUCKET) { + int i; + MEM_SIZE nbytes = ovp->ov_size + 1; + + if ((i = nbytes & 3)) { + i = 4 - i; + while (i--) { + ASSERT(*((char *)((caddr_t)ovp + nbytes - RSLOP + i)) + == RMAGIC_C); + } + } + nbytes = (nbytes + 3) &~ 3; + ASSERT(*(u_int *)((caddr_t)ovp + nbytes - RSLOP) == RMAGIC); + } ovp->ov_rmagic = RMAGIC - 1; #endif ASSERT(OV_INDEX(ovp) < NBUCKETS); @@ -553,36 +1038,33 @@ realloc(void *mp, size_t nbytes) { register MEM_SIZE onb; union overhead *ovp; - char *res; - register int i; - int was_alloced = 0; + char *res, prev_bucket; + register int bucket; + int was_alloced = 0, incr; char *cp = (char*)mp; -#ifdef DEBUGGING +#if defined(DEBUGGING) || !defined(PERL_CORE) MEM_SIZE size = nbytes; -#endif -#ifdef PERL_CORE -#ifdef HAS_64K_LIMIT - if (nbytes > 0xffff) { - PerlIO_printf(PerlIO_stderr(), - "Reallocation too large: %lx\n", size); - my_exit(1); - } -#endif /* HAS_64K_LIMIT */ - if (!cp) - return malloc(nbytes); -#ifdef DEBUGGING if ((long)nbytes < 0) croak("panic: realloc"); #endif -#endif /* PERL_CORE */ + + BARK_64K_LIMIT("Reallocation",nbytes,size); + if (!cp) + return malloc(nbytes); MUTEX_LOCK(&malloc_mutex); ovp = (union overhead *)((caddr_t)cp - - sizeof (union overhead) * CHUNK_SHIFT); - i = OV_INDEX(ovp); - if (OV_MAGIC(ovp, i) == MAGIC) { + - sizeof (union overhead) * CHUNK_SHIFT); + bucket = OV_INDEX(ovp); +#ifdef IGNORE_SMALL_BAD_FREE + if ((bucket < FIRST_BUCKET_WITH_CHECK) + || (OV_MAGIC(ovp, bucket) == MAGIC)) +#else + if (OV_MAGIC(ovp, bucket) == MAGIC) +#endif + { was_alloced = 1; } else { /* @@ -596,40 +1078,57 @@ realloc(void *mp, size_t nbytes) * the memory block being realloc'd is the * smallest possible. */ - if ((i = findbucket(ovp, 1)) < 0 && - (i = findbucket(ovp, reall_srchlen)) < 0) - i = 0; + if ((bucket = findbucket(ovp, 1)) < 0 && + (bucket = findbucket(ovp, reall_srchlen)) < 0) + bucket = 0; } - onb = (1L << (i + 3)) - -#ifdef PACK_MALLOC - (i <= (MAX_PACKED - 3) ? 0 : M_OVERHEAD) -#else - M_OVERHEAD -#endif -#ifdef TWO_POT_OPTIMIZE - + (i >= (FIRST_BIG_TWO_POT - 3) ? PERL_PAGESIZE : 0) -#endif - ; + onb = BUCKET_SIZE_REAL(bucket); /* * avoid the copy if same size block. - * We are not agressive with boundary cases. Note that it is - * possible for small number of cases give false negative if + * We are not agressive with boundary cases. Note that it might + * (for a small number of cases) give false negative if * both new size and old one are in the bucket for - * FIRST_BIG_TWO_POT, but the new one is near the lower end. + * FIRST_BIG_POW2, but the new one is near the lower end. + * + * We do not try to go to 1.5 times smaller bucket so far. */ - if (was_alloced && - nbytes <= onb && (nbytes > ( (onb >> 1) - M_OVERHEAD ) -#ifdef TWO_POT_OPTIMIZE - || (i == (FIRST_BIG_TWO_POT - 3) - && nbytes >= LAST_SMALL_BOUND ) -#endif - )) { + if (nbytes > onb) incr = 1; + else { +#ifdef DO_NOT_TRY_HARDER_WHEN_SHRINKING + if ( /* This is a little bit pessimal if PACK_MALLOC: */ + nbytes > ( (onb >> 1) - M_OVERHEAD ) +# ifdef TWO_POT_OPTIMIZE + || (bucket == FIRST_BIG_POW2 && nbytes >= LAST_SMALL_BOUND ) +# endif + ) +#else /* !DO_NOT_TRY_HARDER_WHEN_SHRINKING */ + prev_bucket = ( (bucket > MAX_PACKED + 1) + ? bucket - BUCKETS_PER_POW2 + : bucket - 1); + if (nbytes > BUCKET_SIZE_REAL(prev_bucket)) +#endif /* !DO_NOT_TRY_HARDER_WHEN_SHRINKING */ + incr = 0; + else incr = -1; + } + if (!was_alloced) goto hard_way; + else if (incr == 0) { + inplace: #ifdef RCHECK /* * Record new allocated size of block and * bound space with magic numbers. */ if (OV_INDEX(ovp) <= MAX_SHORT_BUCKET) { + int i, nb = ovp->ov_size + 1; + + if ((i = nb & 3)) { + i = 4 - i; + while (i--) { + ASSERT(*((char *)((caddr_t)ovp + nb - RSLOP + i)) == RMAGIC_C); + } + } + nb = (nb + 3) &~ 3; + ASSERT(*(u_int *)((caddr_t)ovp + nb - RSLOP) == RMAGIC); /* * Convert amount of memory requested into * closest block size stored in hash buckets @@ -637,33 +1136,87 @@ realloc(void *mp, size_t nbytes) * space used per block for accounting. */ nbytes += M_OVERHEAD; - nbytes = (nbytes + 3) &~ 3; ovp->ov_size = nbytes - 1; + if ((i = nbytes & 3)) { + i = 4 - i; + while (i--) + *((char *)((caddr_t)ovp + nbytes - RSLOP + i)) + = RMAGIC_C; + } + nbytes = (nbytes + 3) &~ 3; *((u_int *)((caddr_t)ovp + nbytes - RSLOP)) = RMAGIC; } #endif res = cp; MUTEX_UNLOCK(&malloc_mutex); - } - else { - MUTEX_UNLOCK(&malloc_mutex); - if ((res = (char*)malloc(nbytes)) == NULL) - return (NULL); - if (cp != res) /* common optimization */ - Copy(cp, res, (MEM_SIZE)(nbytes<onb?nbytes:onb), char); - if (was_alloced) - free(cp); + } else if (incr == 1 && (cp - M_OVERHEAD == last_op) + && (onb > (1 << LOG_OF_MIN_ARENA))) { + MEM_SIZE require, newarena = nbytes, pow; + int shiftr; + + POW2_OPTIMIZE_ADJUST(newarena); + newarena = newarena + M_OVERHEAD; + /* newarena = (newarena + 3) &~ 3; */ + shiftr = (newarena - 1) >> LOG_OF_MIN_ARENA; + pow = LOG_OF_MIN_ARENA + 1; + /* apart from this loop, this is O(1) */ + while (shiftr >>= 1) + pow++; + newarena = (1 << pow) + POW2_OPTIMIZE_SURPLUS(pow * BUCKETS_PER_POW2); + require = newarena - onb - M_OVERHEAD; + + if (require <= sbrked_remains) { + sbrked_remains -= require; + } else { + char *cp; + + require -= sbrked_remains; + /* We do not try to optimize sbrks here, we go for place. */ + cp = (char*) sbrk(require); +#ifdef DEBUGGING_MSTATS + sbrks++; + goodsbrk += require; +#endif + if (cp == last_sbrk_top) { + sbrked_remains = 0; + last_sbrk_top = cp + require; + } else { + /* Report the failure: */ + if (sbrked_remains) + add_to_chain((void*)(last_sbrk_top - sbrked_remains), + sbrked_remains, 0); + add_to_chain((void*)cp, require, 0); + sbrk_good -= SBRK_FAILURE_PRICE; + sbrked_remains = 0; + last_sbrk_top = 0; + last_op = 0; + goto hard_way; + } + } + +#ifdef DEBUGGING_MSTATS + nmalloc[bucket]--; + nmalloc[pow * BUCKETS_PER_POW2]++; +#endif + *(cp - M_OVERHEAD) = pow * BUCKETS_PER_POW2; /* Fill index. */ + goto inplace; + } else { + hard_way: + MUTEX_UNLOCK(&malloc_mutex); + if ((res = (char*)malloc(nbytes)) == NULL) + return (NULL); + if (cp != res) /* common optimization */ + Copy(cp, res, (MEM_SIZE)(nbytes<onb?nbytes:onb), char); + if (was_alloced) + free(cp); } -#ifdef PERL_CORE -#ifdef DEBUGGING - if (debug & 128) { - PerlIO_printf(Perl_debug_log, "0x%lx: (%05lu) rfree\n",(unsigned long)res,(unsigned long)(an++)); - PerlIO_printf(Perl_debug_log, "0x%lx: (%05lu) realloc %ld bytes\n", - (unsigned long)res,(unsigned long)(an++),(long)size); - } -#endif -#endif /* PERL_CORE */ + DEBUG_m(PerlIO_printf(Perl_debug_log, "0x%lu: (%05lu) rfree\n", + (unsigned long)res,(unsigned long)(an++))); + DEBUG_m(PerlIO_printf(Perl_debug_log, + "0x%lx: (%05lu) realloc %ld bytes\n", + (unsigned long)res,(unsigned long)(an++), + (long)size)); return ((Malloc_t)res); } @@ -701,7 +1254,21 @@ calloc(register size_t elements, registe return p; } +MEM_SIZE +malloced_size(void *p) +{ + int bucket = OV_INDEX((union overhead *)p); + + return BUCKET_SIZE_REAL(bucket); +} + #ifdef DEBUGGING_MSTATS + +# ifdef BUCKETS_ROOT2 +# define MIN_EVEN_REPORT 6 +# else +# define MIN_EVEN_REPORT MIN_BUCKET +# endif /* * mstats - print out statistics about malloc * @@ -714,31 +1281,72 @@ dump_mstats(char *s) { register int i, j; register union overhead *p; - int topbucket=0, totfree=0, total=0; + int topbucket=0, topbucket_ev=0, topbucket_odd=0, totfree=0, total=0; u_int nfree[NBUCKETS]; + int total_chain = 0; + struct chunk_chain_s* nextchain = chunk_chain; - for (i=0; i < NBUCKETS; i++) { + for (i = MIN_BUCKET ; i < NBUCKETS; i++) { for (j = 0, p = nextf[i]; p; p = p->ov_next, j++) ; nfree[i] = j; - totfree += nfree[i] * (1 << (i + 3)); - total += nmalloc[i] * (1 << (i + 3)); - if (nmalloc[i]) - topbucket = i; + totfree += nfree[i] * BUCKET_SIZE_REAL(i); + total += nmalloc[i] * BUCKET_SIZE_REAL(i); + if (nmalloc[i]) { + i % 2 ? (topbucket_odd = i) : (topbucket_ev = i); + topbucket = i; + } } if (s) - PerlIO_printf(PerlIO_stderr(), "Memory allocation statistics %s (buckets 8..%d)\n", - s, (1 << (topbucket + 3)) ); + PerlIO_printf(PerlIO_stderr(), + "Memory allocation statistics %s (buckets %d(%d)..%d(%d)\n", + s, + BUCKET_SIZE_REAL(MIN_BUCKET), + BUCKET_SIZE(MIN_BUCKET), + BUCKET_SIZE_REAL(topbucket), BUCKET_SIZE(topbucket)); PerlIO_printf(PerlIO_stderr(), "%8d free:", totfree); - for (i=0; i <= topbucket; i++) { - PerlIO_printf(PerlIO_stderr(), (i<5 || i==7)?" %5d": (i<9)?" %3d":" %d", nfree[i]); + for (i = MIN_EVEN_REPORT; i <= topbucket; i += BUCKETS_PER_POW2) { + PerlIO_printf(PerlIO_stderr(), + ((i < 8*BUCKETS_PER_POW2 || i == 10*BUCKETS_PER_POW2) + ? " %5d" + : ((i < 12*BUCKETS_PER_POW2) ? " %3d" : " %d")), + nfree[i]); + } +#ifdef BUCKETS_ROOT2 + PerlIO_printf(PerlIO_stderr(), "\n\t "); + for (i = MIN_BUCKET + 1; i <= topbucket_odd; i += BUCKETS_PER_POW2) { + PerlIO_printf(PerlIO_stderr(), + ((i < 8*BUCKETS_PER_POW2 || i == 10*BUCKETS_PER_POW2) + ? " %5d" + : ((i < 12*BUCKETS_PER_POW2) ? " %3d" : " %d")), + nfree[i]); } +#endif PerlIO_printf(PerlIO_stderr(), "\n%8d used:", total - totfree); - for (i=0; i <= topbucket; i++) { - PerlIO_printf(PerlIO_stderr(), (i<5 || i==7)?" %5d": (i<9)?" %3d":" %d", nmalloc[i] - nfree[i]); + for (i = MIN_EVEN_REPORT; i <= topbucket; i += BUCKETS_PER_POW2) { + PerlIO_printf(PerlIO_stderr(), + ((i < 8*BUCKETS_PER_POW2 || i == 10*BUCKETS_PER_POW2) + ? " %5d" + : ((i < 12*BUCKETS_PER_POW2) ? " %3d" : " %d")), + nmalloc[i] - nfree[i]); } - PerlIO_printf(PerlIO_stderr(), "\nTotal sbrk(): %8d. Odd ends: sbrk(): %7d, malloc(): %7d bytes.\n", - goodsbrk + sbrk_slack, sbrk_slack, start_slack); +#ifdef BUCKETS_ROOT2 + PerlIO_printf(PerlIO_stderr(), "\n\t "); + for (i = MIN_BUCKET + 1; i <= topbucket_odd; i += BUCKETS_PER_POW2) { + PerlIO_printf(PerlIO_stderr(), + ((i < 8*BUCKETS_PER_POW2 || i == 10*BUCKETS_PER_POW2) + ? " %5d" + : ((i < 12*BUCKETS_PER_POW2) ? " %3d" : " %d")), + nmalloc[i] - nfree[i]); + } +#endif + while (nextchain) { + total_chain += nextchain->size; + nextchain = nextchain->next; + } + PerlIO_printf(PerlIO_stderr(), "\nTotal sbrk(): %d/%d:%d. Odd ends: pad+heads+chain+tail: %d+%d+%d+%d.\n", + goodsbrk + sbrk_slack, sbrks, sbrk_good, sbrk_slack, + start_slack, total_chain, sbrked_remains); } #else void @@ -799,9 +1407,6 @@ int size; if (size >= PERLSBRK_32_K) { small = 0; } else { -#ifndef PERL_CORE - reqsize = size; -#endif size = PERLSBRK_64_K; small = 1; } @@ -816,10 +1421,8 @@ int size; } } -#ifdef PERL_CORE DEBUG_m(PerlIO_printf(Perl_debug_log, "sbrk malloc size %ld (reqsize %ld), left size %ld, give addr 0x%lx\n", size, reqsize, Perl_sbrk_oldsize, got)); -#endif return (void *)got; }