diff options
75 files changed, 3571 insertions, 2069 deletions
diff --git a/Makefrag.am b/Makefrag.am index a80cd24..7180093 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -151,9 +151,9 @@ libkernel_a_SOURCES += \ kern/ipc_sched.h \ kern/ipc_tt.c \ kern/ipc_tt.h \ - kern/kalloc.c \ kern/kalloc.h \ kern/kern_types.h \ + kern/list.h \ kern/lock.c \ kern/lock.h \ kern/lock_mon.c \ @@ -161,7 +161,6 @@ libkernel_a_SOURCES += \ kern/mach_clock.h \ kern/mach_factor.c \ kern/mach_factor.h \ - kern/mach_param.h \ kern/machine.c \ kern/machine.h \ kern/macro_help.h \ @@ -175,7 +174,12 @@ libkernel_a_SOURCES += \ kern/profile.c \ kern/queue.c \ kern/queue.h \ + kern/rbtree.c \ + kern/rbtree.h \ + kern/rbtree_i.h \ kern/refcount.h \ + kern/slab.c \ + kern/slab.h \ kern/sched.h \ kern/sched_prim.c \ kern/sched_prim.h \ @@ -200,8 +204,6 @@ libkernel_a_SOURCES += \ kern/timer.h \ kern/xpr.c \ kern/xpr.h \ - kern/zalloc.c \ - kern/zalloc.h \ kern/elf-load.c \ kern/boot_script.c EXTRA_DIST += \ @@ -402,7 +404,7 @@ include_mach_eXec_HEADERS = \ # mach-debug-headers:= $(addprefix mach_debug/, hash_info.h ipc_info.h \ # mach_debug.defs mach_debug_types.defs mach_debug_types.h \ -# pc_info.h vm_info.h zone_info.h) +# pc_info.h vm_info.h slab_info.h) # Other headers for the distribution. We don't install these, because the # GNU C library has correct versions for users to use. diff --git a/configfrag.ac b/configfrag.ac index 77b0024..5f13b63 100644 --- a/configfrag.ac +++ b/configfrag.ac @@ -102,6 +102,12 @@ AC_DEFINE([STAT_TIME], [1], [STAT_TIME]) # Kernel tracing. AC_DEFINE([XPR_DEBUG], [1], [XPR_DEBUG]) + +# Slab allocator debugging facilities. +AC_DEFINE([SLAB_VERIFY], [0], [SLAB_VERIFY]) + +# Enable the CPU pool layer in the slab allocator. +AC_DEFINE([SLAB_USE_CPU_POOLS], [0], [SLAB_USE_CPU_POOLS]) # # Options. diff --git a/device/dev_lookup.c b/device/dev_lookup.c index 2391e8d..98a2d02 100644 --- a/device/dev_lookup.c +++ b/device/dev_lookup.c @@ -32,7 +32,7 @@ #include <mach/vm_param.h> #include <kern/queue.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <device/device_types.h> #include <device/dev_hdr.h> @@ -62,7 +62,7 @@ queue_head_t dev_number_hash_table[NDEVHASH]; decl_simple_lock_data(, dev_number_lock) -zone_t dev_hdr_zone; +struct kmem_cache dev_hdr_cache; /* * Enter device in the number lookup table. @@ -151,7 +151,7 @@ device_lookup(name) simple_unlock(&dev_number_lock); - new_device = (mach_device_t) zalloc(dev_hdr_zone); + new_device = (mach_device_t) kmem_cache_alloc(&dev_hdr_cache); simple_lock_init(&new_device->ref_lock); new_device->ref_count = 1; simple_lock_init(&new_device->lock); @@ -187,7 +187,7 @@ device_lookup(name) simple_unlock(&dev_number_lock); if (new_device != MACH_DEVICE_NULL) - zfree(dev_hdr_zone, (vm_offset_t)new_device); + kmem_cache_free(&dev_hdr_cache, (vm_offset_t)new_device); } return (device); @@ -233,7 +233,7 @@ mach_device_deallocate(device) simple_unlock(&device->ref_lock); simple_unlock(&dev_number_lock); - zfree(dev_hdr_zone, (vm_offset_t)device); + kmem_cache_free(&dev_hdr_cache, (vm_offset_t)device); } /* @@ -376,9 +376,6 @@ dev_lookup_init() simple_lock_init(&dev_port_lock); - dev_hdr_zone = zinit(sizeof(struct mach_device), 0, - sizeof(struct mach_device) * NDEVICES, - PAGE_SIZE, - FALSE, - "open device entry"); + kmem_cache_init(&dev_hdr_cache, "mach_device", + sizeof(struct mach_device), 0, NULL, NULL, NULL, 0); } diff --git a/device/dev_pager.c b/device/dev_pager.c index 447781e..bc58a15 100644 --- a/device/dev_pager.c +++ b/device/dev_pager.c @@ -44,8 +44,7 @@ #include <kern/debug.h> #include <kern/printf.h> #include <kern/queue.h> -#include <kern/zalloc.h> -#include <kern/kalloc.h> +#include <kern/slab.h> #include <vm/vm_page.h> #include <vm/vm_kern.h> @@ -126,7 +125,7 @@ typedef struct dev_pager *dev_pager_t; #define DEV_PAGER_NULL ((dev_pager_t)0) -zone_t dev_pager_zone; +struct kmem_cache dev_pager_cache; void dev_pager_reference(register dev_pager_t ds) { @@ -144,7 +143,7 @@ void dev_pager_deallocate(register dev_pager_t ds) } simple_unlock(&ds->lock); - zfree(dev_pager_zone, (vm_offset_t)ds); + kmem_cache_free(&dev_pager_cache, (vm_offset_t)ds); } /* @@ -161,7 +160,7 @@ struct dev_pager_entry { typedef struct dev_pager_entry *dev_pager_entry_t; queue_head_t dev_pager_hashtable[DEV_PAGER_HASH_COUNT]; -zone_t dev_pager_hash_zone; +struct kmem_cache dev_pager_hash_cache; decl_simple_lock_data(, dev_pager_hash_lock) @@ -174,13 +173,8 @@ void dev_pager_hash_init(void) register vm_size_t size; size = sizeof(struct dev_pager_entry); - dev_pager_hash_zone = zinit( - size, - 0, - size * 1000, - PAGE_SIZE, - FALSE, - "dev_pager port hash"); + kmem_cache_init(&dev_pager_hash_cache, "dev_pager_entry", size, 0, + NULL, NULL, NULL, 0); for (i = 0; i < DEV_PAGER_HASH_COUNT; i++) queue_init(&dev_pager_hashtable[i]); simple_lock_init(&dev_pager_hash_lock); @@ -192,7 +186,7 @@ void dev_pager_hash_insert( { register dev_pager_entry_t new_entry; - new_entry = (dev_pager_entry_t) zalloc(dev_pager_hash_zone); + new_entry = (dev_pager_entry_t) kmem_cache_alloc(&dev_pager_hash_cache); new_entry->name = name_port; new_entry->pager_rec = rec; @@ -220,7 +214,7 @@ void dev_pager_hash_delete(ipc_port_t name_port) } simple_unlock(&dev_pager_hash_lock); if (entry) - zfree(dev_pager_hash_zone, (vm_offset_t)entry); + kmem_cache_free(&dev_pager_hash_cache, (vm_offset_t)entry); } dev_pager_t dev_pager_hash_lookup(ipc_port_t name_port) @@ -273,7 +267,7 @@ kern_return_t device_pager_setup( return (D_SUCCESS); } - d = (dev_pager_t) zalloc(dev_pager_zone); + d = (dev_pager_t) kmem_cache_alloc(&dev_pager_cache); if (d == DEV_PAGER_NULL) return (KERN_RESOURCE_SHORTAGE); @@ -726,15 +720,11 @@ void device_pager_init(void) register vm_size_t size; /* - * Initialize zone of paging structures. + * Initialize cache of paging structures. */ size = sizeof(struct dev_pager); - dev_pager_zone = zinit(size, - 0, - (vm_size_t) size * 1000, - PAGE_SIZE, - FALSE, - "device pager structures"); + kmem_cache_init(&dev_pager_cache, "dev_pager", size, 0, + NULL, NULL, NULL, 0); /* * Initialize the name port hashing stuff. diff --git a/device/ds_routines.c b/device/ds_routines.c index d4a08fb..5a6fdd2 100644 --- a/device/ds_routines.c +++ b/device/ds_routines.c @@ -73,7 +73,7 @@ #include <kern/debug.h> #include <kern/printf.h> #include <kern/queue.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <kern/thread.h> #include <kern/task.h> #include <kern/sched_prim.h> @@ -130,7 +130,8 @@ static struct device_emulation_ops *emulation_list[] = &mach_device_emulation_ops, }; -vm_map_t device_io_map; +static struct vm_map device_io_map_store; +vm_map_t device_io_map = &device_io_map_store; #define NUM_EMULATION (sizeof (emulation_list) / sizeof (emulation_list[0])) @@ -855,7 +856,7 @@ device_write_get(ior, wait) */ if (ior->io_op & IO_INBAND) { assert(ior->io_count <= sizeof (io_buf_ptr_inband_t)); - new_addr = zalloc(io_inband_zone); + new_addr = kmem_cache_alloc(&io_inband_cache); memcpy((void*)new_addr, ior->io_data, ior->io_count); ior->io_data = (io_buf_ptr_t)new_addr; ior->io_alloc_size = sizeof (io_buf_ptr_inband_t); @@ -935,7 +936,7 @@ device_write_dealloc(ior) * Inband case. */ if (ior->io_op & IO_INBAND) { - zfree(io_inband_zone, (vm_offset_t)ior->io_data); + kmem_cache_free(&io_inband_cache, (vm_offset_t)ior->io_data); return (TRUE); } @@ -1245,7 +1246,7 @@ kern_return_t device_read_alloc(ior, size) return (KERN_SUCCESS); if (ior->io_op & IO_INBAND) { - ior->io_data = (io_buf_ptr_t) zalloc(io_inband_zone); + ior->io_data = (io_buf_ptr_t) kmem_cache_alloc(&io_inband_cache); ior->io_alloc_size = sizeof(io_buf_ptr_inband_t); } else { size = round_page(size); @@ -1338,7 +1339,7 @@ boolean_t ds_read_done(ior) if (ior->io_count != 0) { if (ior->io_op & IO_INBAND) { if (ior->io_alloc_size > 0) - zfree(io_inband_zone, (vm_offset_t)ior->io_data); + kmem_cache_free(&io_inband_cache, (vm_offset_t)ior->io_data); } else { register vm_offset_t end_alloc; @@ -1551,11 +1552,9 @@ void mach_device_init() queue_init(&io_done_list); simple_lock_init(&io_done_list_lock); - device_io_map = kmem_suballoc(kernel_map, - &device_io_min, - &device_io_max, - DEVICE_IO_MAP_SIZE, - FALSE); + kmem_submap(device_io_map, kernel_map, &device_io_min, &device_io_max, + DEVICE_IO_MAP_SIZE, FALSE); + /* * If the kernel receives many device_write requests, the * device_io_map might run out of space. To prevent @@ -1575,11 +1574,8 @@ void mach_device_init() */ device_io_map->wait_for_space = TRUE; - io_inband_zone = zinit(sizeof(io_buf_ptr_inband_t), 0, - 1000 * sizeof(io_buf_ptr_inband_t), - 10 * sizeof(io_buf_ptr_inband_t), - FALSE, - "io inband read buffers"); + kmem_cache_init(&io_inband_cache, "io_buf_ptr_inband", + sizeof(io_buf_ptr_inband_t), 0, NULL, NULL, NULL, 0); mach_device_trap_init(); } @@ -1615,7 +1611,7 @@ void iowait(ior) */ #define IOTRAP_REQSIZE 2048 -zone_t io_trap_zone; +struct kmem_cache io_trap_cache; /* * Initialization. Called from mach_device_init(). @@ -1623,24 +1619,21 @@ zone_t io_trap_zone; static void mach_device_trap_init(void) { - io_trap_zone = zinit(IOTRAP_REQSIZE, 0, - 256 * IOTRAP_REQSIZE, - 16 * IOTRAP_REQSIZE, - FALSE, - "wired device trap buffers"); + kmem_cache_init(&io_trap_cache, "io_req", IOTRAP_REQSIZE, 0, + NULL, NULL, NULL, 0); } /* * Allocate an io_req_t. - * Currently zalloc's from io_trap_zone. + * Currently allocates from io_trap_cache. * - * Could have lists of different size zones. + * Could have lists of different size caches. * Could call a device-specific routine. */ io_req_t ds_trap_req_alloc(mach_device_t device, vm_size_t data_size) { - return (io_req_t) zalloc(io_trap_zone); + return (io_req_t) kmem_cache_alloc(&io_trap_cache); } /* @@ -1656,7 +1649,7 @@ ds_trap_write_done(io_req_t ior) /* * Should look at reply port and maybe send a message. */ - zfree(io_trap_zone, (vm_offset_t) ior); + kmem_cache_free(&io_trap_cache, (vm_offset_t) ior); /* * Give up device reference from ds_write_trap. @@ -1732,7 +1725,7 @@ device_write_trap (mach_device_t device, dev_mode_t mode, */ mach_device_deallocate(device); - zfree(io_trap_zone, (vm_offset_t) ior); + kmem_cache_free(&io_trap_cache, (vm_offset_t) ior); return (result); } @@ -1823,7 +1816,7 @@ device_writev_trap (mach_device_t device, dev_mode_t mode, */ mach_device_deallocate(device); - zfree(io_trap_zone, (vm_offset_t) ior); + kmem_cache_free(&io_trap_cache, (vm_offset_t) ior); return (result); } diff --git a/device/io_req.h b/device/io_req.h index 162524d..65e23e6 100644 --- a/device/io_req.h +++ b/device/io_req.h @@ -35,6 +35,7 @@ #include <mach/port.h> #include <mach/message.h> #include <mach/vm_param.h> +#include <kern/slab.h> #include <kern/kalloc.h> #include <kern/lock.h> #include <vm/vm_page.h> @@ -124,7 +125,7 @@ struct io_req { void iodone(io_req_t); /* - * Macros to allocate and free IORs - will convert to zones later. + * Macros to allocate and free IORs - will convert to caches later. */ #define io_req_alloc(ior,size) \ MACRO_BEGIN \ @@ -136,6 +137,6 @@ void iodone(io_req_t); (kfree((vm_offset_t)(ior), sizeof(struct io_req))) -zone_t io_inband_zone; /* for inband reads */ +struct kmem_cache io_inband_cache; /* for inband reads */ #endif /* _IO_REQ_ */ diff --git a/device/net_io.c b/device/net_io.c index 8446395..52a0716 100644 --- a/device/net_io.c +++ b/device/net_io.c @@ -61,6 +61,7 @@ #include <kern/printf.h> #include <kern/queue.h> #include <kern/sched_prim.h> +#include <kern/slab.h> #include <kern/thread.h> #include <machine/machspl.h> @@ -302,7 +303,7 @@ struct net_rcv_port { }; typedef struct net_rcv_port *net_rcv_port_t; -zone_t net_rcv_zone; /* zone of net_rcv_port structs */ +struct kmem_cache net_rcv_cache; /* cache of net_rcv_port structs */ #define NET_HASH_SIZE 256 @@ -324,7 +325,7 @@ struct net_hash_entry { }; typedef struct net_hash_entry *net_hash_entry_t; -zone_t net_hash_entry_zone; +struct kmem_cache net_hash_entry_cache; /* * This structure represents a packet filter with multiple sessions. @@ -1195,7 +1196,7 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count) * If there is no match instruction, we allocate * a normal packet filter structure. */ - my_infp = (net_rcv_port_t) zalloc(net_rcv_zone); + my_infp = (net_rcv_port_t) kmem_cache_alloc(&net_rcv_cache); my_infp->rcv_port = rcv_port; is_new_infp = TRUE; } else { @@ -1205,7 +1206,7 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count) * a hash table to deal with them. */ my_infp = 0; - hash_entp = (net_hash_entry_t) zalloc(net_hash_entry_zone); + hash_entp = (net_hash_entry_t) kmem_cache_alloc(&net_hash_entry_cache); is_new_infp = FALSE; } @@ -1310,7 +1311,8 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count) ipc_port_release_send(rcv_port); if (match != 0) - zfree (net_hash_entry_zone, (vm_offset_t)hash_entp); + kmem_cache_free(&net_hash_entry_cache, + (vm_offset_t)hash_entp); rval = D_NO_MEMORY; goto clean_and_return; @@ -1526,20 +1528,12 @@ net_io_init() register vm_size_t size; size = sizeof(struct net_rcv_port); - net_rcv_zone = zinit(size, - 0, - size * 1000, - PAGE_SIZE, - FALSE, - "net_rcv_port"); + kmem_cache_init(&net_rcv_cache, "net_rcv_port", size, 0, + NULL, NULL, NULL, 0); size = sizeof(struct net_hash_entry); - net_hash_entry_zone = zinit(size, - 0, - size * 100, - PAGE_SIZE, - FALSE, - "net_hash_entry"); + kmem_cache_init(&net_hash_entry_cache, "net_hash_entry", size, 0, + NULL, NULL, NULL, 0); size = ikm_plus_overhead(sizeof(struct net_rcv_msg)); net_kmsg_size = round_page(size); @@ -2167,7 +2161,7 @@ net_free_dead_infp (dead_infp) nextfp = (net_rcv_port_t) queue_next(&infp->input); ipc_port_release_send(infp->rcv_port); net_del_q_info(infp->rcv_qlimit); - zfree(net_rcv_zone, (vm_offset_t) infp); + kmem_cache_free(&net_rcv_cache, (vm_offset_t) infp); } } @@ -2190,7 +2184,7 @@ net_free_dead_entp (dead_entp) ipc_port_release_send(entp->rcv_port); net_del_q_info(entp->rcv_qlimit); - zfree(net_hash_entry_zone, (vm_offset_t) entp); + kmem_cache_free(&net_hash_entry_cache, (vm_offset_t) entp); } } diff --git a/i386/Makefrag.am b/i386/Makefrag.am index ab3502a..aca4215 100644 --- a/i386/Makefrag.am +++ b/i386/Makefrag.am @@ -136,7 +136,6 @@ libkernel_a_SOURCES += \ i386/i386/vm_param.h \ i386/i386/vm_tuning.h \ i386/i386/xpr.h \ - i386/i386/zalloc.h \ i386/intel/pmap.c \ i386/intel/pmap.h \ i386/intel/read_fault.c \ diff --git a/i386/configfrag.ac b/i386/configfrag.ac index e4ce97e..77f66af 100644 --- a/i386/configfrag.ac +++ b/i386/configfrag.ac @@ -25,6 +25,9 @@ dnl USE OF THIS SOFTWARE. AC_DEFINE([__ELF__], [1], [__ELF__]) AC_DEFINE([i386], [1], [i386]) + # Determines the size of the CPU cache line. + AC_DEFINE([CPU_L1_SHIFT], [6], [CPU_L1_SHIFT]) + [# Does the architecture provide machine-specific interfaces? mach_machine_routines=1;; *)] diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c index 2626a38..f2c8124 100644 --- a/i386/i386/fpu.c +++ b/i386/i386/fpu.c @@ -44,10 +44,9 @@ #include <kern/debug.h> #include <machine/machspl.h> /* spls */ -#include <kern/mach_param.h> #include <kern/printf.h> #include <kern/thread.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <i386/thread.h> #include <i386/fpu.h> @@ -72,7 +71,7 @@ extern void i386_exception(); int fp_kind = FP_387; /* 80387 present */ -zone_t ifps_zone; /* zone for FPU save area */ +struct kmem_cache ifps_cache; /* cache for FPU save area */ static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */ void fp_save(thread_t thread); @@ -193,10 +192,9 @@ init_fpu() void fpu_module_init() { - ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16, - THREAD_MAX * sizeof(struct i386_fpsave_state), - THREAD_CHUNK * sizeof(struct i386_fpsave_state), - 0, "i386 fpsave state"); + kmem_cache_init(&ifps_cache, "i386_fpsave_state", + sizeof(struct i386_fpsave_state), 16, + NULL, NULL, NULL, 0); } /* @@ -221,7 +219,7 @@ ASSERT_IPL(SPL0); clear_fpu(); } #endif /* NCPUS == 1 */ - zfree(ifps_zone, (vm_offset_t) fps); + kmem_cache_free(&ifps_cache, (vm_offset_t) fps); } /* The two following functions were stolen from Linux's i387.c */ @@ -335,7 +333,7 @@ ASSERT_IPL(SPL0); simple_unlock(&pcb->lock); if (ifps != 0) { - zfree(ifps_zone, (vm_offset_t) ifps); + kmem_cache_free(&ifps_cache, (vm_offset_t) ifps); } } else { @@ -356,7 +354,7 @@ ASSERT_IPL(SPL0); if (ifps == 0) { if (new_ifps == 0) { simple_unlock(&pcb->lock); - new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); + new_ifps = (struct i386_fpsave_state *) kmem_cache_alloc(&ifps_cache); goto Retry; } ifps = new_ifps; @@ -396,7 +394,7 @@ ASSERT_IPL(SPL0); simple_unlock(&pcb->lock); if (new_ifps != 0) - zfree(ifps_zone, (vm_offset_t) new_ifps); + kmem_cache_free(&ifps_cache, (vm_offset_t) new_ifps); } return KERN_SUCCESS; @@ -609,7 +607,7 @@ fpextovrflt() clear_fpu(); if (ifps) - zfree(ifps_zone, (vm_offset_t) ifps); + kmem_cache_free(&ifps_cache, (vm_offset_t) ifps); /* * Raise exception. @@ -785,7 +783,7 @@ fp_load(thread) ASSERT_IPL(SPL0); ifps = pcb->ims.ifps; if (ifps == 0) { - ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); + ifps = (struct i386_fpsave_state *) kmem_cache_alloc(&ifps_cache); memset(ifps, 0, sizeof *ifps); pcb->ims.ifps = ifps; fpinit(); @@ -836,7 +834,7 @@ fp_state_alloc() pcb_t pcb = current_thread()->pcb; struct i386_fpsave_state *ifps; - ifps = (struct i386_fpsave_state *)zalloc(ifps_zone); + ifps = (struct i386_fpsave_state *)kmem_cache_alloc(&ifps_cache); memset(ifps, 0, sizeof *ifps); pcb->ims.ifps = ifps; diff --git a/i386/i386/io_perm.c b/i386/i386/io_perm.c index df25cc6..8bacb8d 100644 --- a/i386/i386/io_perm.c +++ b/i386/i386/io_perm.c @@ -54,7 +54,8 @@ #include <ipc/ipc_port.h> #include <ipc/ipc_space.h> -#include <kern/zalloc.h> +#include <kern/slab.h> +#include <kern/kalloc.h> #include <kern/lock.h> #include <kern/queue.h> #include <kern/thread.h> @@ -257,12 +258,12 @@ i386_io_perm_modify (task_t target_task, io_perm_t io_perm, boolean_t enable) if (!iopb) { simple_unlock (&target_task->machine.iopb_lock); - iopb = (unsigned char *) zalloc (machine_task_iopb_zone); + iopb = (unsigned char *) kmem_cache_alloc (&machine_task_iopb_cache); simple_lock (&target_task->machine.iopb_lock); if (target_task->machine.iopb) { if (iopb) - zfree (machine_task_iopb_zone, (vm_offset_t) iopb); + kmem_cache_free (&machine_task_iopb_cache, (vm_offset_t) iopb); iopb = target_task->machine.iopb; iopb_size = target_task->machine.iopb_size; } diff --git a/i386/i386/machine_task.c b/i386/i386/machine_task.c index 35b89e0..62b22e3 100644 --- a/i386/i386/machine_task.c +++ b/i386/i386/machine_task.c @@ -22,15 +22,14 @@ #include <kern/lock.h> #include <mach/mach_types.h> -#include <kern/zalloc.h> -#include <kern/mach_param.h> +#include <kern/slab.h> #include <machine/task.h> #include <machine/io_perm.h> -/* The zone which holds our IO permission bitmaps. */ -zone_t machine_task_iopb_zone; +/* The cache which holds our IO permission bitmaps. */ +struct kmem_cache machine_task_iopb_cache; /* Initialize the machine task module. The function is called once at @@ -38,11 +37,8 @@ zone_t machine_task_iopb_zone; void machine_task_module_init (void) { - machine_task_iopb_zone = zinit (IOPB_BYTES, 0, - TASK_MAX * IOPB_BYTES, - IOPB_BYTES, - ZONE_COLLECTABLE | ZONE_EXHAUSTIBLE, - "i386 machine task iopb"); + kmem_cache_init (&machine_task_iopb_cache, "i386_task_iopb", IOPB_BYTES, 0, + NULL, NULL, NULL, 0); } @@ -62,7 +58,8 @@ void machine_task_terminate (task_t task) { if (task->machine.iopb) - zfree (machine_task_iopb_zone, (vm_offset_t) task->machine.iopb); + kmem_cache_free (&machine_task_iopb_cache, + (vm_offset_t) task->machine.iopb); } @@ -74,7 +71,8 @@ machine_task_collect (task_t task) simple_lock (&task->machine.iopb_lock); if (task->machine.iopb_size == 0 && task->machine.iopb) { - zfree (machine_task_iopb_zone, (vm_offset_t) task->machine.iopb); + kmem_cache_free (&machine_task_iopb_cache, + (vm_offset_t) task->machine.iopb); task->machine.iopb = 0; } simple_unlock (&task->machine.iopb_lock); diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c index fffa92a..e065dbb 100644 --- a/i386/i386/pcb.c +++ b/i386/i386/pcb.c @@ -36,9 +36,9 @@ #include "vm_param.h" #include <kern/counters.h> #include <kern/debug.h> -#include <kern/mach_param.h> #include <kern/thread.h> #include <kern/sched_prim.h> +#include <kern/slab.h> #include <vm/vm_kern.h> #include <vm/pmap.h> @@ -65,7 +65,7 @@ extern void Thread_continue(); extern void user_ldt_free(); -zone_t pcb_zone; +struct kmem_cache pcb_cache; vm_offset_t kernel_stack[NCPUS]; /* top of active_stack */ @@ -369,10 +369,8 @@ thread_t switch_context(old, continuation, new) void pcb_module_init() { - pcb_zone = zinit(sizeof(struct pcb), 0, - THREAD_MAX * sizeof(struct pcb), - THREAD_CHUNK * sizeof(struct pcb), - 0, "i386 pcb state"); + kmem_cache_init(&pcb_cache, "pcb", sizeof(struct pcb), 0, + NULL, NULL, NULL, 0); fpu_module_init(); } @@ -382,7 +380,7 @@ void pcb_init(thread) { register pcb_t pcb; - pcb = (pcb_t) zalloc(pcb_zone); + pcb = (pcb_t) kmem_cache_alloc(&pcb_cache); if (pcb == 0) panic("pcb_init"); @@ -422,7 +420,7 @@ void pcb_terminate(thread) fp_free(pcb->ims.ifps); if (pcb->ims.ldt != 0) user_ldt_free(pcb->ims.ldt); - zfree(pcb_zone, (vm_offset_t) pcb); + kmem_cache_free(&pcb_cache, (vm_offset_t) pcb); thread->pcb = 0; } diff --git a/i386/i386/task.h b/i386/i386/task.h index ca8de04..0060ad4 100644 --- a/i386/i386/task.h +++ b/i386/i386/task.h @@ -24,7 +24,7 @@ #define _I386_TASK_H_ #include <kern/kern_types.h> -#include <kern/zalloc.h> +#include <kern/slab.h> /* The machine specific data of a task. */ struct machine_task @@ -41,7 +41,7 @@ struct machine_task typedef struct machine_task machine_task_t; -extern zone_t machine_task_iopb_zone; +extern struct kmem_cache machine_task_iopb_cache; /* Initialize the machine task module. The function is called once at start up by task_init in kern/task.c. */ diff --git a/i386/i386/zalloc.h b/i386/i386/zalloc.h deleted file mode 100644 index bf7cf6b..0000000 --- a/i386/i386/zalloc.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 1996-1994 The University of Utah and - * the Computer Systems Laboratory (CSL). All rights reserved. - * - * Permission to use, copy, modify and distribute this software is hereby - * granted provided that (1) source code retains these copyright, permission, - * and disclaimer notices, and (2) redistributions including binaries - * reproduce the notices in supporting documentation, and (3) all advertising - * materials mentioning features or use of this software display the following - * acknowledgement: ``This product includes software developed by the - * Computer Systems Laboratory at the University of Utah.'' - * - * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS - * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF - * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * CSL requests users of this software to return to csl-dist@cs.utah.edu any - * improvements that they make and grant CSL redistribution rights. - * - * Utah $Hdr: zalloc.h 1.4 94/12/16$ - * Author: Bryan Ford - */ - -#ifndef _I386_ZALLOC_H_ -#define _I386_ZALLOC_H_ - -#include <kern/zalloc.h> - -#endif /* _I386_ZALLOC_H_ */ diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index c448e57..d6e18e5 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -63,7 +63,7 @@ #include <kern/debug.h> #include <kern/printf.h> #include <kern/thread.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <kern/lock.h> @@ -113,7 +113,7 @@ pv_entry_t pv_head_table; /* array of entries, one per page */ /* * pv_list entries are kept on a list that can only be accessed * with the pmap system locked (at SPLVM, not in the cpus_active set). - * The list is refilled from the pv_list_zone if it becomes empty. + * The list is refilled from the pv_list_cache if it becomes empty. */ pv_entry_t pv_free_list; /* free list at SPLVM */ decl_simple_lock_data(, pv_free_list_lock) @@ -133,7 +133,7 @@ decl_simple_lock_data(, pv_free_list_lock) simple_unlock(&pv_free_list_lock); \ } -zone_t pv_list_zone; /* zone of pv_entry structures */ +struct kmem_cache pv_list_cache; /* cache of pv_entry structures */ /* * Each entry in the pv_head_table is locked by a bit in the @@ -400,7 +400,7 @@ struct pmap_update_list cpu_update_list[NCPUS]; struct pmap kernel_pmap_store; pmap_t kernel_pmap; -struct zone *pmap_zone; /* zone of pmap structures */ +struct kmem_cache pmap_cache; /* cache of pmap structures */ int pmap_debug = 0; /* flag for debugging prints */ @@ -937,13 +937,13 @@ void pmap_init() pmap_phys_attributes = (char *) addr; /* - * Create the zone of physical maps, + * Create the cache of physical maps, * and of the physical-to-virtual entries. */ s = (vm_size_t) sizeof(struct pmap); - pmap_zone = zinit(s, 0, 400*s, 4096, 0, "pmap"); /* XXX */ + kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, NULL, NULL, 0); s = (vm_size_t) sizeof(struct pv_entry); - pv_list_zone = zinit(s, 0, 10000*s, 4096, 0, "pv_list"); /* XXX */ + kmem_cache_init(&pv_list_cache, "pv_entry", s, 0, NULL, NULL, NULL, 0); #if NCPUS > 1 /* @@ -1009,7 +1009,7 @@ pmap_page_table_page_alloc() /* * We cannot allocate the pmap_object in pmap_init, - * because it is called before the zone package is up. + * because it is called before the cache package is up. * Allocate it now if it is missing. */ if (pmap_object == VM_OBJECT_NULL) @@ -1113,11 +1113,11 @@ pmap_t pmap_create(size) } /* - * Allocate a pmap struct from the pmap_zone. Then allocate - * the page descriptor table from the pd_zone. + * Allocate a pmap struct from the pmap_cache. Then allocate + * the page descriptor table. */ - p = (pmap_t) zalloc(pmap_zone); + p = (pmap_t) kmem_cache_alloc(&pmap_cache); if (p == PMAP_NULL) panic("pmap_create"); @@ -1232,7 +1232,7 @@ void pmap_destroy(p) #endif /* MACH_XEN */ kmem_free(kernel_map, (vm_offset_t)p->pdpbase, INTEL_PGBYTES); #endif /* PAE */ - zfree(pmap_zone, (vm_offset_t) p); + kmem_cache_free(&pmap_cache, (vm_offset_t) p); } /* @@ -1782,7 +1782,7 @@ if (pmap_debug) printf("pmap(%x, %x)\n", v, pa); /* * Must allocate a new pvlist entry while we're unlocked; - * zalloc may cause pageout (which will lock the pmap system). + * Allocating may cause pageout (which will lock the pmap system). * If we determine we need a pvlist entry, we will unlock * and allocate one. Then we will retry, throughing away * the allocated entry later (if we no longer need it). @@ -1966,9 +1966,9 @@ Retry: PMAP_READ_UNLOCK(pmap, spl); /* - * Refill from zone. + * Refill from cache. */ - pv_e = (pv_entry_t) zalloc(pv_list_zone); + pv_e = (pv_entry_t) kmem_cache_alloc(&pv_list_cache); goto Retry; } } diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h index 7ba7d2c..e02ad36 100644 --- a/i386/intel/pmap.h +++ b/i386/intel/pmap.h @@ -37,7 +37,6 @@ #ifndef __ASSEMBLER__ -#include <kern/zalloc.h> #include <kern/lock.h> #include <mach/machine/vm_param.h> #include <mach/vm_statistics.h> diff --git a/include/mach_debug/mach_debug.defs b/include/mach_debug/mach_debug.defs index 2a58dc4..053c3fe 100644 --- a/include/mach_debug/mach_debug.defs +++ b/include/mach_debug/mach_debug.defs @@ -42,17 +42,7 @@ skip; /* host_ipc_statistics_reset */ skip; /* host_callout_info */ skip; /* host_callout_statistics */ skip; /* host_callout_statistics_reset */ - -/* - * Returns information about the memory allocation zones. - */ -routine host_zone_info( - host : host_t; - out names : zone_name_array_t, - CountInOut, Dealloc; - out info : zone_info_array_t, - CountInOut, Dealloc); - +skip; /* host_zone_info */ skip; /* host_ipc_bucket_info */ #if !defined(MACH_IPC_DEBUG) || MACH_IPC_DEBUG @@ -228,6 +218,14 @@ routine mach_vm_object_pages( out pages : vm_page_info_array_t, CountInOut, Dealloc); +/* + * Returns information about the memory allocation caches. + */ +routine host_slab_info( + host : host_t; + out info : cache_info_array_t, + CountInOut, Dealloc); + #else /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */ skip; /* mach_vm_region_info */ skip; /* mach_vm_object_info */ diff --git a/include/mach_debug/mach_debug_types.defs b/include/mach_debug/mach_debug_types.defs index 9f1976f..f60125a 100644 --- a/include/mach_debug/mach_debug_types.defs +++ b/include/mach_debug/mach_debug_types.defs @@ -32,11 +32,8 @@ #include <mach/std_types.defs> -type zone_name_t = struct[80] of char; -type zone_name_array_t = array[] of zone_name_t; - -type zone_info_t = struct[9] of integer_t; -type zone_info_array_t = array[] of zone_info_t; +type cache_info_t = struct[19] of integer_t; +type cache_info_array_t = array[] of cache_info_t; type hash_info_bucket_t = struct[1] of natural_t; type hash_info_bucket_array_t = array[] of hash_info_bucket_t; diff --git a/include/mach_debug/mach_debug_types.h b/include/mach_debug/mach_debug_types.h index 2ba0cb1..5d4efcd 100644 --- a/include/mach_debug/mach_debug_types.h +++ b/include/mach_debug/mach_debug_types.h @@ -32,7 +32,7 @@ #include <mach_debug/ipc_info.h> #include <mach_debug/vm_info.h> -#include <mach_debug/zone_info.h> +#include <mach_debug/slab_info.h> #include <mach_debug/hash_info.h> typedef char symtab_name_t[32]; diff --git a/include/mach_debug/zone_info.h b/include/mach_debug/slab_info.h index 1b36fe0..37dcb8c 100644 --- a/include/mach_debug/zone_info.h +++ b/include/mach_debug/slab_info.h @@ -24,38 +24,39 @@ * the rights to redistribute these changes. */ -#ifndef _MACH_DEBUG_ZONE_INFO_H_ -#define _MACH_DEBUG_ZONE_INFO_H_ +#ifndef _MACH_DEBUG_SLAB_INFO_H_ +#define _MACH_DEBUG_SLAB_INFO_H_ -#include <mach/boolean.h> -#include <mach/machine/vm_types.h> +#include <sys/types.h> /* * Remember to update the mig type definitions * in mach_debug_types.defs when adding/removing fields. */ -#define ZONE_NAME_MAX_LEN 80 - -typedef struct zone_name { - char zn_name[ZONE_NAME_MAX_LEN]; -} zone_name_t; - -typedef zone_name_t *zone_name_array_t; - - -typedef struct zone_info { - integer_t zi_count; /* Number of elements used now */ - vm_size_t zi_cur_size; /* current memory utilization */ - vm_size_t zi_max_size; /* how large can this zone grow */ - vm_size_t zi_elem_size; /* size of an element */ - vm_size_t zi_alloc_size; /* size used for more memory */ -/*boolean_t*/integer_t zi_pageable; /* zone pageable? */ -/*boolean_t*/integer_t zi_sleepable; /* sleep if empty? */ -/*boolean_t*/integer_t zi_exhaustible; /* merely return if empty? */ -/*boolean_t*/integer_t zi_collectable; /* garbage collect elements? */ -} zone_info_t; - -typedef zone_info_t *zone_info_array_t; - -#endif /* _MACH_DEBUG_ZONE_INFO_H_ */ +#define CACHE_NAME_MAX_LEN 32 + +#define CACHE_FLAGS_NO_CPU_POOL 0x01 +#define CACHE_FLAGS_SLAB_EXTERNAL 0x02 +#define CACHE_FLAGS_NO_RECLAIM 0x04 +#define CACHE_FLAGS_VERIFY 0x08 +#define CACHE_FLAGS_DIRECT 0x10 + +typedef struct cache_info { + int flags; + size_t cpu_pool_size; + size_t obj_size; + size_t align; + size_t buf_size; + size_t slab_size; + unsigned long bufs_per_slab; + unsigned long nr_objs; + unsigned long nr_bufs; + unsigned long nr_slabs; + unsigned long nr_free_slabs; + char name[CACHE_NAME_MAX_LEN]; +} cache_info_t; + +typedef cache_info_t *cache_info_array_t; + +#endif /* _MACH_DEBUG_SLAB_INFO_H_ */ diff --git a/ipc/ipc_entry.c b/ipc/ipc_entry.c index 42e8dd8..3a06244 100644 --- a/ipc/ipc_entry.c +++ b/ipc/ipc_entry.c @@ -41,7 +41,7 @@ #include <mach/port.h> #include <kern/assert.h> #include <kern/sched_prim.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <ipc/port.h> #include <ipc/ipc_types.h> #include <ipc/ipc_entry.h> @@ -51,7 +51,7 @@ #include <ipc/ipc_table.h> #include <ipc/ipc_object.h> -zone_t ipc_tree_entry_zone; +struct kmem_cache ipc_tree_entry_cache; /* * Routine: ipc_entry_tree_collision diff --git a/ipc/ipc_entry.h b/ipc/ipc_entry.h index a577cf0..6afa4f6 100644 --- a/ipc/ipc_entry.h +++ b/ipc/ipc_entry.h @@ -41,7 +41,7 @@ #include <mach/mach_types.h> #include <mach/port.h> #include <mach/kern_return.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <ipc/port.h> #include <ipc/ipc_table.h> #include <ipc/ipc_types.h> @@ -129,10 +129,10 @@ typedef struct ipc_tree_entry { #define ite_request ite_entry.ie_request #define ite_next ite_entry.hash.tree -extern zone_t ipc_tree_entry_zone; +extern struct kmem_cache ipc_tree_entry_cache; -#define ite_alloc() ((ipc_tree_entry_t) zalloc(ipc_tree_entry_zone)) -#define ite_free(ite) zfree(ipc_tree_entry_zone, (vm_offset_t) (ite)) +#define ite_alloc() ((ipc_tree_entry_t) kmem_cache_alloc(&ipc_tree_entry_cache)) +#define ite_free(ite) kmem_cache_free(&ipc_tree_entry_cache, (vm_offset_t) (ite)) extern ipc_entry_t diff --git a/ipc/ipc_hash.c b/ipc/ipc_hash.c index c241289..ad05016 100644 --- a/ipc/ipc_hash.c +++ b/ipc/ipc_hash.c @@ -535,13 +535,9 @@ ipc_hash_init(void) { ipc_hash_index_t i; - /* if not configured, initialize ipc_hash_global_size */ + /* initialize ipc_hash_global_size */ - if (ipc_hash_global_size == 0) { - ipc_hash_global_size = ipc_tree_entry_max >> 8; - if (ipc_hash_global_size < 32) - ipc_hash_global_size = 32; - } + ipc_hash_global_size = IPC_HASH_GLOBAL_SIZE; /* make sure it is a power of two */ diff --git a/ipc/ipc_hash.h b/ipc/ipc_hash.h index 58c56ca..929ba77 100644 --- a/ipc/ipc_hash.h +++ b/ipc/ipc_hash.h @@ -67,6 +67,8 @@ ipc_hash_delete(ipc_space_t space, ipc_object_t obj, * and the local primitives, for table entries. */ +#define IPC_HASH_GLOBAL_SIZE 256 + extern boolean_t ipc_hash_global_lookup(ipc_space_t space, ipc_object_t obj, mach_port_t *namep, ipc_tree_entry_t *entryp); diff --git a/ipc/ipc_init.c b/ipc/ipc_init.c index e9ca64d..ca7e791 100644 --- a/ipc/ipc_init.c +++ b/ipc/ipc_init.c @@ -35,8 +35,8 @@ */ #include <mach/kern_return.h> -#include <kern/mach_param.h> #include <kern/ipc_host.h> +#include <kern/slab.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> #include <ipc/ipc_entry.h> @@ -52,14 +52,10 @@ -vm_map_t ipc_kernel_map; +static struct vm_map ipc_kernel_map_store; +vm_map_t ipc_kernel_map = &ipc_kernel_map_store; vm_size_t ipc_kernel_map_size = 8 * 1024 * 1024; -int ipc_space_max = SPACE_MAX; -int ipc_tree_entry_max = ITE_MAX; -int ipc_port_max = PORT_MAX; -int ipc_pset_max = SET_MAX; - /* * Routine: ipc_bootstrap * Purpose: @@ -77,28 +73,17 @@ ipc_bootstrap(void) ipc_port_timestamp_lock_init(); ipc_port_timestamp_data = 0; - ipc_space_zone = zinit(sizeof(struct ipc_space), 0, - ipc_space_max * sizeof(struct ipc_space), - sizeof(struct ipc_space), - 0, "ipc spaces"); - - ipc_tree_entry_zone = - zinit(sizeof(struct ipc_tree_entry), 0, - ipc_tree_entry_max * sizeof(struct ipc_tree_entry), - sizeof(struct ipc_tree_entry), - IPC_ZONE_TYPE, "ipc tree entries"); - - ipc_object_zones[IOT_PORT] = - zinit(sizeof(struct ipc_port), 0, - ipc_port_max * sizeof(struct ipc_port), - sizeof(struct ipc_port), - 0, "ipc ports"); - - ipc_object_zones[IOT_PORT_SET] = - zinit(sizeof(struct ipc_pset), 0, - ipc_pset_max * sizeof(struct ipc_pset), - sizeof(struct ipc_pset), - IPC_ZONE_TYPE, "ipc port sets"); + kmem_cache_init(&ipc_space_cache, "ipc_space", + sizeof(struct ipc_space), 0, NULL, NULL, NULL, 0); + + kmem_cache_init(&ipc_tree_entry_cache, "ipc_tree_entry", + sizeof(struct ipc_tree_entry), 0, NULL, NULL, NULL, 0); + + kmem_cache_init(&ipc_object_caches[IOT_PORT], "ipc_port", + sizeof(struct ipc_port), 0, NULL, NULL, NULL, 0); + + kmem_cache_init(&ipc_object_caches[IOT_PORT_SET], "ipc_pset", + sizeof(struct ipc_pset), 0, NULL, NULL, NULL, 0); /* create special spaces */ @@ -127,8 +112,8 @@ ipc_init() { vm_offset_t min, max; - ipc_kernel_map = kmem_suballoc(kernel_map, &min, &max, - ipc_kernel_map_size, TRUE); + kmem_submap(ipc_kernel_map, kernel_map, &min, &max, + ipc_kernel_map_size, TRUE); ipc_host_init(); } diff --git a/ipc/ipc_init.h b/ipc/ipc_init.h index b2f1dd4..8dd64bb 100644 --- a/ipc/ipc_init.h +++ b/ipc/ipc_init.h @@ -37,14 +37,6 @@ #ifndef _IPC_IPC_INIT_H_ #define _IPC_IPC_INIT_H_ -/* all IPC zones should be exhaustible */ -#define IPC_ZONE_TYPE ZONE_EXHAUSTIBLE - -extern int ipc_space_max; -extern int ipc_tree_entry_max; -extern int ipc_port_max; -extern int ipc_pset_max; - /* * Exported interfaces */ diff --git a/ipc/ipc_marequest.c b/ipc/ipc_marequest.c index 540382a..06c53eb 100644 --- a/ipc/ipc_marequest.c +++ b/ipc/ipc_marequest.c @@ -37,9 +37,8 @@ #include <mach/message.h> #include <mach/port.h> #include <kern/lock.h> -#include <kern/mach_param.h> #include <kern/kalloc.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <ipc/port.h> #include <ipc/ipc_init.h> #include <ipc/ipc_space.h> @@ -58,11 +57,10 @@ #endif -zone_t ipc_marequest_zone; -int ipc_marequest_max = IMAR_MAX; +struct kmem_cache ipc_marequest_cache; -#define imar_alloc() ((ipc_marequest_t) zalloc(ipc_marequest_zone)) -#define imar_free(imar) zfree(ipc_marequest_zone, (vm_offset_t) (imar)) +#define imar_alloc() ((ipc_marequest_t) kmem_cache_alloc(&ipc_marequest_cache)) +#define imar_free(imar) kmem_cache_free(&ipc_marequest_cache, (vm_offset_t) (imar)) typedef unsigned int ipc_marequest_index_t; @@ -100,13 +98,9 @@ ipc_marequest_init(void) { ipc_marequest_index_t i; - /* if not configured, initialize ipc_marequest_size */ + /* initialize ipc_marequest_size */ - if (ipc_marequest_size == 0) { - ipc_marequest_size = ipc_marequest_max >> 8; - if (ipc_marequest_size < 16) - ipc_marequest_size = 16; - } + ipc_marequest_size = IPC_MAREQUEST_SIZE; /* make sure it is a power of two */ @@ -142,11 +136,8 @@ ipc_marequest_init(void) bucket->imarb_head = IMAR_NULL; } - ipc_marequest_zone = - zinit(sizeof(struct ipc_marequest), 0, - ipc_marequest_max * sizeof(struct ipc_marequest), - sizeof(struct ipc_marequest), - IPC_ZONE_TYPE, "ipc msg-accepted requests"); + kmem_cache_init(&ipc_marequest_cache, "ipc_marequest", + sizeof(struct ipc_marequest), 0, NULL, NULL, NULL, 0); } /* @@ -439,7 +430,7 @@ ipc_marequest_info(maxp, info, count) info[i].hib_count = bucket_count; } - *maxp = ipc_marequest_max; + *maxp = (unsigned int)-1; return ipc_marequest_size; } diff --git a/ipc/ipc_marequest.h b/ipc/ipc_marequest.h index eb3746d..4f6f758 100644 --- a/ipc/ipc_marequest.h +++ b/ipc/ipc_marequest.h @@ -70,6 +70,7 @@ typedef struct ipc_marequest { #define IMAR_NULL ((ipc_marequest_t) 0) +#define IPC_MAREQUEST_SIZE 16 extern void ipc_marequest_init(void); diff --git a/ipc/ipc_object.c b/ipc/ipc_object.c index a7a7ddb..4850fb1 100644 --- a/ipc/ipc_object.c +++ b/ipc/ipc_object.c @@ -47,8 +47,9 @@ #include <ipc/ipc_pset.h> #include <kern/debug.h> #include <kern/printf.h> +#include <kern/slab.h> -zone_t ipc_object_zones[IOT_NUMBER]; +struct kmem_cache ipc_object_caches[IOT_NUMBER]; diff --git a/ipc/ipc_object.h b/ipc/ipc_object.h index 2bbf8bd..adf5bca 100644 --- a/ipc/ipc_object.h +++ b/ipc/ipc_object.h @@ -39,7 +39,7 @@ #include <ipc/ipc_types.h> #include <kern/lock.h> #include <kern/macro_help.h> -#include <kern/zalloc.h> +#include <kern/slab.h> typedef unsigned int ipc_object_refs_t; typedef unsigned int ipc_object_bits_t; @@ -57,7 +57,7 @@ typedef struct ipc_object { #define IO_VALID(io) (((io) != IO_NULL) && ((io) != IO_DEAD)) #define IO_BITS_KOTYPE 0x0000ffff /* used by the object */ -#define IO_BITS_OTYPE 0x7fff0000 /* determines a zone */ +#define IO_BITS_OTYPE 0x7fff0000 /* determines a cache */ #define IO_BITS_ACTIVE 0x80000000U /* is object alive? */ #define io_active(io) ((int)(io)->io_bits < 0) /* hack */ @@ -75,13 +75,13 @@ typedef struct ipc_object { #define IOT_PORT_SET 1 #define IOT_NUMBER 2 /* number of types used */ -extern zone_t ipc_object_zones[IOT_NUMBER]; +extern struct kmem_cache ipc_object_caches[IOT_NUMBER]; #define io_alloc(otype) \ - ((ipc_object_t) zalloc(ipc_object_zones[(otype)])) + ((ipc_object_t) kmem_cache_alloc(&ipc_object_caches[(otype)])) #define io_free(otype, io) \ - zfree(ipc_object_zones[(otype)], (vm_offset_t) (io)) + kmem_cache_free(&ipc_object_caches[(otype)], (vm_offset_t) (io)) #define io_lock_init(io) simple_lock_init(&(io)->io_lock_data) #define io_lock(io) simple_lock(&(io)->io_lock_data) diff --git a/ipc/ipc_space.c b/ipc/ipc_space.c index 0f50f15..ab55e83 100644 --- a/ipc/ipc_space.c +++ b/ipc/ipc_space.c @@ -43,7 +43,7 @@ #include <mach/port.h> #include <kern/assert.h> #include <kern/sched_prim.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <ipc/port.h> #include <ipc/ipc_entry.h> #include <ipc/ipc_splay.h> @@ -55,7 +55,7 @@ -zone_t ipc_space_zone; +struct kmem_cache ipc_space_cache; ipc_space_t ipc_space_kernel; ipc_space_t ipc_space_reply; diff --git a/ipc/ipc_space.h b/ipc/ipc_space.h index d030bf7..c4683d2 100644 --- a/ipc/ipc_space.h +++ b/ipc/ipc_space.h @@ -44,6 +44,7 @@ #include <mach/mach_types.h> #include <kern/macro_help.h> #include <kern/lock.h> +#include <kern/slab.h> #include <ipc/ipc_splay.h> #include <ipc/ipc_types.h> @@ -82,10 +83,10 @@ struct ipc_space { #define IS_NULL ((ipc_space_t) 0) -extern zone_t ipc_space_zone; +extern struct kmem_cache ipc_space_cache; -#define is_alloc() ((ipc_space_t) zalloc(ipc_space_zone)) -#define is_free(is) zfree(ipc_space_zone, (vm_offset_t) (is)) +#define is_alloc() ((ipc_space_t) kmem_cache_alloc(&ipc_space_cache)) +#define is_free(is) kmem_cache_free(&ipc_space_cache, (vm_offset_t) (is)) extern struct ipc_space *ipc_space_kernel; extern struct ipc_space *ipc_space_reply; diff --git a/ipc/ipc_table.c b/ipc/ipc_table.c index e572358..d5b7904 100644 --- a/ipc/ipc_table.c +++ b/ipc/ipc_table.c @@ -50,13 +50,6 @@ void ipc_table_fill( unsigned int min, vm_size_t elemsize); -/* - * We borrow the kalloc map, rather than creating - * yet another submap of the kernel map. - */ - -extern vm_map_t kalloc_map; - ipc_table_size_t ipc_table_entries; unsigned int ipc_table_entries_size = 512; @@ -30,8 +30,7 @@ #include <mach/kern_return.h> #include <mach/alert.h> -#include <kern/mach_param.h> /* XXX INCALL_... */ -#include <kern/zalloc.h> +#include <kern/slab.h> #include <kern/thread.h> #include <kern/task.h> #include <kern/debug.h> @@ -47,7 +46,7 @@ static void special_handler(ReturnHandler *rh, struct Act *act); #endif #ifndef ACT_STATIC_KLUDGE -static zone_t act_zone; +static struct kmem_cache act_cache; #else static Act *act_freelist; static Act free_acts[ACT_STATIC_KLUDGE]; @@ -68,11 +67,8 @@ void global_act_init() { #ifndef ACT_STATIC_KLUDGE - act_zone = zinit( - sizeof(struct Act), 0, - ACT_MAX * sizeof(struct Act), /* XXX */ - ACT_CHUNK * sizeof(struct Act), - 0, "activations"); + kmem_cache_init(&act_cache, "Act", sizeof(struct Act), 0, + NULL, NULL, NULL, 0); #else int i; @@ -104,7 +100,7 @@ kern_return_t act_create(task_t task, vm_offset_t user_stack, int rc; #ifndef ACT_STATIC_KLUDGE - act = (Act*)zalloc(act_zone); + act = (Act*)kmem_cache_alloc(&act_cache); if (act == 0) return(KERN_RESOURCE_SHORTAGE); #else @@ -170,9 +166,9 @@ static void act_free(Act *inc) /* Drop the task reference. */ task_deallocate(inc->task); - /* Put the act back on the act zone */ + /* Put the act back on the act cache */ #ifndef ACT_STATIC_KLUDGE - zfree(act_zone, (vm_offset_t)inc); + kmem_cache_free(&act_cache, (vm_offset_t)inc); #else /* XXX ipt_lock(act_freelist); */ inc->ipt_next = act_freelist; diff --git a/kern/bootstrap.c b/kern/bootstrap.c index 2c63df4..68f40b4 100644 --- a/kern/bootstrap.c +++ b/kern/bootstrap.c @@ -42,6 +42,7 @@ #include <kern/debug.h> #include <kern/host.h> #include <kern/printf.h> +#include <kern/kalloc.h> #include <kern/task.h> #include <kern/thread.h> #include <kern/lock.h> diff --git a/kern/ipc_tt.c b/kern/ipc_tt.c index de4edc6..6d32e5b 100644 --- a/kern/ipc_tt.c +++ b/kern/ipc_tt.c @@ -36,6 +36,7 @@ #include <mach/thread_special_ports.h> #include <vm/vm_kern.h> #include <kern/debug.h> +#include <kern/kalloc.h> #include <kern/task.h> #include <kern/thread.h> #include <kern/ipc_kobject.h> diff --git a/kern/kalloc.c b/kern/kalloc.c deleted file mode 100644 index 8256305..0000000 --- a/kern/kalloc.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. - * Copyright (c) 1993,1994 The University of Utah and - * the Computer Systems Laboratory (CSL). - * All rights reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF - * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY - * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF - * THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - * File: kern/kalloc.c - * Author: Avadis Tevanian, Jr. - * Date: 1985 - * - * General kernel memory allocator. This allocator is designed - * to be used by the kernel to manage dynamic memory fast. - */ - -#include <mach/machine/vm_types.h> -#include <mach/vm_param.h> - -#include <kern/debug.h> -#include <kern/zalloc.h> -#include <kern/kalloc.h> -#include <vm/vm_kern.h> -#include <vm/vm_object.h> -#include <vm/vm_map.h> - - - -vm_map_t kalloc_map; -vm_size_t kalloc_map_size = 64 * 1024 * 1024; -vm_size_t kalloc_max; - -/* - * All allocations of size less than kalloc_max are rounded to the - * next highest power of 2. This allocator is built on top of - * the zone allocator. A zone is created for each potential size - * that we are willing to get in small blocks. - * - * We assume that kalloc_max is not greater than 64K; - * thus 16 is a safe array size for k_zone and k_zone_name. - */ - -int first_k_zone = -1; -struct zone *k_zone[16]; -static char *k_zone_name[16] = { - "kalloc.1", "kalloc.2", - "kalloc.4", "kalloc.8", - "kalloc.16", "kalloc.32", - "kalloc.64", "kalloc.128", - "kalloc.256", "kalloc.512", - "kalloc.1024", "kalloc.2048", - "kalloc.4096", "kalloc.8192", - "kalloc.16384", "kalloc.32768" -}; - -/* - * Max number of elements per zone. zinit rounds things up correctly - * Doing things this way permits each zone to have a different maximum size - * based on need, rather than just guessing; it also - * means its patchable in case you're wrong! - */ -unsigned long k_zone_max[16] = { - 1024, /* 1 Byte */ - 1024, /* 2 Byte */ - 1024, /* 4 Byte */ - 1024, /* 8 Byte */ - 1024, /* 16 Byte */ - 4096, /* 32 Byte */ - 4096, /* 64 Byte */ - 4096, /* 128 Byte */ - 4096, /* 256 Byte */ - 1024, /* 512 Byte */ - 1024, /* 1024 Byte */ - 1024, /* 2048 Byte */ - 1024, /* 4096 Byte */ - 4096, /* 8192 Byte */ - 64, /* 16384 Byte */ - 64, /* 32768 Byte */ -}; - -/* - * Initialize the memory allocator. This should be called only - * once on a system wide basis (i.e. first processor to get here - * does the initialization). - * - * This initializes all of the zones. - */ - -#ifndef NDEBUG -static int kalloc_init_called; -#endif - -void kalloc_init() -{ - vm_offset_t min, max; - vm_size_t size; - register int i; - - assert (! kalloc_init_called); - - kalloc_map = kmem_suballoc(kernel_map, &min, &max, - kalloc_map_size, FALSE); - - /* - * Ensure that zones up to size 8192 bytes exist. - * This is desirable because messages are allocated - * with kalloc, and messages up through size 8192 are common. - */ - - if (PAGE_SIZE < 16*1024) - kalloc_max = 16*1024; - else - kalloc_max = PAGE_SIZE; - - /* - * Allocate a zone for each size we are going to handle. - * We specify non-paged memory. - */ - for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) { - if (size < MINSIZE) { - k_zone[i] = 0; - continue; - } - if (size == MINSIZE) { - first_k_zone = i; - } - k_zone[i] = zinit(size, 0, k_zone_max[i] * size, size, - size >= PAGE_SIZE ? ZONE_COLLECTABLE : 0, - k_zone_name[i]); - } - -#ifndef NDEBUG - kalloc_init_called = 1; -#endif -} - -vm_offset_t kalloc(size) - vm_size_t size; -{ - register int zindex; - register vm_size_t allocsize; - vm_offset_t addr; - - /* compute the size of the block that we will actually allocate */ - - assert (kalloc_init_called); - - allocsize = size; - if (size < kalloc_max) { - allocsize = MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - } - - /* - * If our size is still small enough, check the queue for that size - * and allocate. - */ - - if (allocsize < kalloc_max) { - addr = zalloc(k_zone[zindex]); - } else { - if (kmem_alloc_wired(kalloc_map, &addr, allocsize) - != KERN_SUCCESS) - addr = 0; - } - return(addr); -} - -vm_offset_t kget(size) - vm_size_t size; -{ - register int zindex; - register vm_size_t allocsize; - vm_offset_t addr; - - assert (kalloc_init_called); - - /* compute the size of the block that we will actually allocate */ - - allocsize = size; - if (size < kalloc_max) { - allocsize = MINSIZE; - zindex = first_k_zone; - while (allocsize < size) { - allocsize <<= 1; - zindex++; - } - } - - /* - * If our size is still small enough, check the queue for that size - * and allocate. - */ - - if (allocsize < kalloc_max) { - addr = zget(k_zone[zindex]); - } else { - /* This will never work, so we might as well panic */ - panic("kget"); - } - return(addr); -} - -void -kfree(data, size) - vm_offset_t data; - vm_size_t size; -{ - register int zindex; - register vm_size_t freesize; - - assert (kalloc_init_called); - - freesize = size; - if (size < kalloc_max) { - freesize = MINSIZE; - zindex = first_k_zone; - while (freesize < size) { - freesize <<= 1; - zindex++; - } - } - - if (freesize < kalloc_max) { - zfree(k_zone[zindex], data); - } else { - kmem_free(kalloc_map, data, freesize); - } -} diff --git a/kern/kalloc.h b/kern/kalloc.h index a80f6db..1330b54 100644 --- a/kern/kalloc.h +++ b/kern/kalloc.h @@ -28,11 +28,11 @@ #define _KERN_KALLOC_H_ #include <mach/machine/vm_types.h> +#include <vm/vm_types.h> -#define MINSIZE 16 +extern vm_map_t kalloc_map; extern vm_offset_t kalloc (vm_size_t size); -extern vm_offset_t kget (vm_size_t size); extern void kfree (vm_offset_t data, vm_size_t size); extern void kalloc_init (void); diff --git a/kern/list.h b/kern/list.h new file mode 100644 index 0000000..de7d115 --- /dev/null +++ b/kern/list.h @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2009, 2010 Richard Braun. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_LIST_H +#define _KERN_LIST_H + +#include <stddef.h> +#include <sys/types.h> + +#define structof(ptr, type, member) \ + ((type *)((char *)ptr - offsetof(type, member))) + +/* + * Structure used as both head and node. + * + * This implementation relies on using the same type for both heads and nodes. + * + * It is recommended to encode the use of struct list variables in their names, + * e.g. struct list free_list or struct list free_objects is a good hint for a + * list of free objects. A declaration like struct list free_node clearly + * indicates it is used as part of a node in the free list. + */ +struct list { + struct list *prev; + struct list *next; +}; + +/* + * Static list initializer. + */ +#define LIST_INITIALIZER(list) { &(list), &(list) } + +/* + * Initialize a list. + */ +static inline void list_init(struct list *list) +{ + list->prev = list; + list->next = list; +} + +/* + * Initialize a list node. + * + * An entry is in no list when its node members point to NULL. + */ +static inline void list_node_init(struct list *node) +{ + node->prev = NULL; + node->next = NULL; +} + +/* + * Return true if node is in no list. + */ +static inline int list_node_unlinked(const struct list *node) +{ + return node->prev == NULL; +} + +/* + * Macro that evaluates to the address of the structure containing the + * given node based on the given type and member. + */ +#define list_entry(node, type, member) structof(node, type, member) + +/* + * Return the first node of a list. + */ +static inline struct list * list_first(const struct list *list) +{ + return list->next; +} + +/* + * Return the last node of a list. + */ +static inline struct list * list_last(const struct list *list) +{ + return list->prev; +} + +/* + * Return the node next to the given node. + */ +static inline struct list * list_next(const struct list *node) +{ + return node->next; +} + +/* + * Return the node previous to the given node. + */ +static inline struct list * list_prev(const struct list *node) +{ + return node->prev; +} + +/* + * Get the first entry of a list. + */ +#define list_first_entry(list, type, member) \ + list_entry(list_first(list), type, member) + +/* + * Get the last entry of a list. + */ +#define list_last_entry(list, type, member) \ + list_entry(list_last(list), type, member) + +/* + * Return true if node is after the last or before the first node of the list. + */ +static inline int list_end(const struct list *list, const struct list *node) +{ + return list == node; +} + +/* + * Return true if list is empty. + */ +static inline int list_empty(const struct list *list) +{ + return list == list->next; +} + +/* + * Return true if list contains exactly one node. + */ +static inline int list_singular(const struct list *list) +{ + return (list != list->next) && (list->next == list->prev); +} + +/* + * Split list2 by moving its nodes up to (but not including) the given + * node into list1 (which can be in a stale state). + * + * If list2 is empty, or node is list2 or list2->next, nothing is done. + */ +static inline void list_split(struct list *list1, struct list *list2, + struct list *node) +{ + if (list_empty(list2) || (list2->next == node) || list_end(list2, node)) + return; + + list1->next = list2->next; + list1->next->prev = list1; + + list1->prev = node->prev; + node->prev->next = list1; + + list2->next = node; + node->prev = list2; +} + +/* + * Append the nodes of list2 at the end of list1. + * + * After completion, list2 is stale. + */ +static inline void list_concat(struct list *list1, const struct list *list2) +{ + struct list *last1, *first2, *last2; + + if (list_empty(list2)) + return; + + last1 = list1->prev; + first2 = list2->next; + last2 = list2->prev; + + last1->next = first2; + first2->prev = last1; + + last2->next = list1; + list1->prev = last2; +} + +/* + * Set the new head of a list. + * + * This function is an optimized version of : + * list_init(&new_list); + * list_concat(&new_list, &old_list); + * + * After completion, old_head is stale. + */ +static inline void list_set_head(struct list *new_head, + const struct list *old_head) +{ + if (list_empty(old_head)) { + list_init(new_head); + return; + } + + *new_head = *old_head; + new_head->next->prev = new_head; + new_head->prev->next = new_head; +} + +/* + * Add a node between two nodes. + */ +static inline void list_add(struct list *prev, struct list *next, + struct list *node) +{ + next->prev = node; + node->next = next; + + prev->next = node; + node->prev = prev; +} + +/* + * Insert a node at the head of a list. + */ +static inline void list_insert(struct list *list, struct list *node) +{ + list_add(list, list->next, node); +} + +/* + * Insert a node at the tail of a list. + */ +static inline void list_insert_tail(struct list *list, struct list *node) +{ + list_add(list->prev, list, node); +} + +/* + * Insert a node before another node. + */ +static inline void list_insert_before(struct list *next, struct list *node) +{ + list_add(next->prev, next, node); +} + +/* + * Insert a node after another node. + */ +static inline void list_insert_after(struct list *prev, struct list *node) +{ + list_add(prev, prev->next, node); +} + +/* + * Remove a node from a list. + * + * After completion, the node is stale. + */ +static inline void list_remove(struct list *node) +{ + node->prev->next = node->next; + node->next->prev = node->prev; +} + +/* + * Forge a loop to process all nodes of a list. + * + * The node must not be altered during the loop. + */ +#define list_for_each(list, node) \ +for (node = list_first(list); \ + !list_end(list, node); \ + node = list_next(node)) + +/* + * Forge a loop to process all nodes of a list. + */ +#define list_for_each_safe(list, node, tmp) \ +for (node = list_first(list), tmp = list_next(node); \ + !list_end(list, node); \ + node = tmp, tmp = list_next(node)) + +/* + * Version of list_for_each() that processes nodes backward. + */ +#define list_for_each_reverse(list, node) \ +for (node = list_last(list); \ + !list_end(list, node); \ + node = list_prev(node)) + +/* + * Version of list_for_each_safe() that processes nodes backward. + */ +#define list_for_each_reverse_safe(list, node, tmp) \ +for (node = list_last(list), tmp = list_prev(node); \ + !list_end(list, node); \ + node = tmp, tmp = list_prev(node)) + +/* + * Forge a loop to process all entries of a list. + * + * The entry node must not be altered during the loop. + */ +#define list_for_each_entry(list, entry, member) \ +for (entry = list_entry(list_first(list), typeof(*entry), member); \ + !list_end(list, &entry->member); \ + entry = list_entry(list_next(&entry->member), typeof(*entry), \ + member)) + +/* + * Forge a loop to process all entries of a list. + */ +#define list_for_each_entry_safe(list, entry, tmp, member) \ +for (entry = list_entry(list_first(list), typeof(*entry), member), \ + tmp = list_entry(list_next(&entry->member), typeof(*entry), \ + member); \ + !list_end(list, &entry->member); \ + entry = tmp, tmp = list_entry(list_next(&entry->member), \ + typeof(*entry), member)) + +/* + * Version of list_for_each_entry() that processes entries backward. + */ +#define list_for_each_entry_reverse(list, entry, member) \ +for (entry = list_entry(list_last(list), typeof(*entry), member); \ + !list_end(list, &entry->member); \ + entry = list_entry(list_prev(&entry->member), typeof(*entry), \ + member)) + +/* + * Version of list_for_each_entry_safe() that processes entries backward. + */ +#define list_for_each_entry_reverse_safe(list, entry, tmp, member) \ +for (entry = list_entry(list_last(list), typeof(*entry), member), \ + tmp = list_entry(list_prev(&entry->member), typeof(*entry), \ + member); \ + !list_end(list, &entry->member); \ + entry = tmp, tmp = list_entry(list_prev(&entry->member), \ + typeof(*entry), member)) + +#endif /* _KERN_LIST_H */ diff --git a/kern/mach_clock.c b/kern/mach_clock.c index 4ba7c08..edf87f0 100644 --- a/kern/mach_clock.c +++ b/kern/mach_clock.c @@ -47,7 +47,6 @@ #include <kern/host.h> #include <kern/lock.h> #include <kern/mach_clock.h> -#include <kern/mach_param.h> #include <kern/processor.h> #include <kern/queue.h> #include <kern/sched.h> @@ -514,7 +513,7 @@ int timeclose() /* * Compatibility for device drivers. * New code should use set_timeout/reset_timeout and private timers. - * These code can't use a zone to allocate timers, because + * These code can't use a cache to allocate timers, because * it can be called from interrupt handlers. */ diff --git a/kern/mach_param.h b/kern/mach_param.h deleted file mode 100644 index 10376d8..0000000 --- a/kern/mach_param.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. - * Copyright (c) 1993,1994 The University of Utah and - * the Computer Systems Laboratory (CSL). - * All rights reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF - * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY - * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF - * THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - * File: kern/mach_param.h - * Author: Avadis Tevanian, Jr., Michael Wayne Young - * Date: 1986 - * - * Mach system sizing parameters - * - */ - -#ifndef _KERN_MACH_PARAM_H_ -#define _KERN_MACH_PARAM_H_ - -#define THREAD_MAX 1024 /* Max number of threads */ -#define THREAD_CHUNK 64 /* Allocation chunk */ - -#define TASK_MAX 1024 /* Max number of tasks */ -#define TASK_CHUNK 64 /* Allocation chunk */ - -#define ACT_MAX 1024 /* Max number of acts */ -#define ACT_CHUNK 64 /* Allocation chunk */ - -#define ACTPOOL_MAX 1024 -#define ACTPOOL_CHUNK 64 - -#define PORT_MAX ((TASK_MAX * 3 + THREAD_MAX) /* kernel */ \ - + (THREAD_MAX * 2) /* user */ \ - + 40000) /* slop for objects */ - /* Number of ports, system-wide */ - -#define SET_MAX (TASK_MAX + THREAD_MAX + 200) - /* Max number of port sets */ - -#define ITE_MAX (1 << 16) /* Max number of splay tree entries */ - -#define SPACE_MAX (TASK_MAX + 5) /* Max number of IPC spaces */ - -#define IMAR_MAX (1 << 10) /* Max number of msg-accepted reqs */ - -#endif /* _KERN_MACH_PARAM_H_ */ diff --git a/kern/macro_help.h b/kern/macro_help.h index e13b01d..a3d156b 100644 --- a/kern/macro_help.h +++ b/kern/macro_help.h @@ -45,8 +45,8 @@ boolean_t ALWAYS; #define ALWAYS TRUE #endif /* lint */ -#define MACRO_BEGIN do { -#define MACRO_END } while (NEVER) +#define MACRO_BEGIN ({ +#define MACRO_END }) #define MACRO_RETURN if (ALWAYS) return diff --git a/kern/pc_sample.c b/kern/pc_sample.c index c82707b..2cec907 100644 --- a/kern/pc_sample.c +++ b/kern/pc_sample.c @@ -31,6 +31,7 @@ #include <mach/std_types.h> /* pointer_t */ #include <mach/pc_sample.h> #include <machine/trap.h> +#include <kern/kalloc.h> #include <kern/host.h> #include <kern/thread.h> #include <kern/pc_sample.h> diff --git a/kern/priority.c b/kern/priority.c index feddd8e..17541b8 100644 --- a/kern/priority.c +++ b/kern/priority.c @@ -39,7 +39,6 @@ #include <mach/machine.h> #include <kern/host.h> #include <kern/mach_clock.h> -#include <kern/mach_param.h> #include <kern/sched.h> #include <kern/sched_prim.h> #include <kern/thread.h> diff --git a/kern/processor.c b/kern/processor.c index 718ff3a..1986860 100644 --- a/kern/processor.c +++ b/kern/processor.c @@ -35,6 +35,7 @@ #include <mach/vm_param.h> #include <kern/cpu_number.h> #include <kern/debug.h> +#include <kern/kalloc.h> #include <kern/lock.h> #include <kern/host.h> #include <kern/ipc_tt.h> @@ -46,8 +47,8 @@ #include <ipc/ipc_port.h> #if MACH_HOST -#include <kern/zalloc.h> -zone_t pset_zone; +#include <kern/slab.h> +struct kmem_cache pset_cache; #endif /* MACH_HOST */ @@ -112,10 +113,10 @@ void pset_sys_init(void) register processor_t processor; /* - * Allocate the zone for processor sets. + * Allocate the cache for processor sets. */ - pset_zone = zinit(sizeof(struct processor_set), 0, 128*PAGE_SIZE, - PAGE_SIZE, 0, "processor sets"); + kmem_cache_init(&pset_cache, "processor_set", + sizeof(struct processor_set), 0, NULL, NULL, NULL, 0); /* * Give each processor a control port. @@ -394,7 +395,7 @@ void pset_deallocate( /* * That's it, free data structure. */ - zfree(pset_zone, (vm_offset_t)pset); + kmem_cache_free(&pset_cache, (vm_offset_t)pset); #endif /* MACH_HOST */ } @@ -538,7 +539,7 @@ processor_set_create( if (host == HOST_NULL) return KERN_INVALID_ARGUMENT; - pset = (processor_set_t) zalloc(pset_zone); + pset = (processor_set_t) kmem_cache_alloc(&pset_cache); pset_init(pset); pset_reference(pset); /* for new_set out argument */ pset_reference(pset); /* for new_name out argument */ diff --git a/kern/rbtree.c b/kern/rbtree.c new file mode 100644 index 0000000..1c04c5c --- /dev/null +++ b/kern/rbtree.c @@ -0,0 +1,478 @@ +/* + * Copyright (c) 2010 Richard Braun. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <kern/assert.h> +#include <kern/rbtree.h> +#include <kern/rbtree_i.h> +#include <sys/types.h> + +#define unlikely(expr) __builtin_expect(!!(expr), 0) + +/* + * Return the index of a node in the children array of its parent. + * + * The parent parameter must not be null, and must be the parent of the + * given node. + */ +static inline int rbtree_index(const struct rbtree_node *node, + const struct rbtree_node *parent) +{ + assert(parent != NULL); + assert((node == NULL) || (rbtree_parent(node) == parent)); + + if (parent->children[RBTREE_LEFT] == node) + return RBTREE_LEFT; + + assert(parent->children[RBTREE_RIGHT] == node); + + return RBTREE_RIGHT; +} + +/* + * Return the color of a node. + */ +static inline int rbtree_color(const struct rbtree_node *node) +{ + return node->parent & RBTREE_COLOR_MASK; +} + +/* + * Return true if the node is red. + */ +static inline int rbtree_is_red(const struct rbtree_node *node) +{ + return rbtree_color(node) == RBTREE_COLOR_RED; +} + +/* + * Return true if the node is black. + */ +static inline int rbtree_is_black(const struct rbtree_node *node) +{ + return rbtree_color(node) == RBTREE_COLOR_BLACK; +} + +/* + * Set the parent of a node, retaining its current color. + */ +static inline void rbtree_set_parent(struct rbtree_node *node, + struct rbtree_node *parent) +{ + assert(rbtree_check_alignment(node)); + assert(rbtree_check_alignment(parent)); + + node->parent = (unsigned long)parent | (node->parent & RBTREE_COLOR_MASK); +} + +/* + * Set the color of a node, retaining its current parent. + */ +static inline void rbtree_set_color(struct rbtree_node *node, int color) +{ + assert((color & ~RBTREE_COLOR_MASK) == 0); + node->parent = (node->parent & RBTREE_PARENT_MASK) | color; +} + +/* + * Set the color of a node to red, retaining its current parent. + */ +static inline void rbtree_set_red(struct rbtree_node *node) +{ + rbtree_set_color(node, RBTREE_COLOR_RED); +} + +/* + * Set the color of a node to black, retaining its current parent. + */ +static inline void rbtree_set_black(struct rbtree_node *node) +{ + rbtree_set_color(node, RBTREE_COLOR_BLACK); +} + +/* + * Perform a tree rotation, rooted at the given node. + * + * The direction parameter defines the rotation direction and is either + * RBTREE_LEFT or RBTREE_RIGHT. + */ +static void rbtree_rotate(struct rbtree *tree, struct rbtree_node *node, + int direction) +{ + struct rbtree_node *parent, *rnode; + int left, right; + + left = direction; + right = 1 - left; + parent = rbtree_parent(node); + rnode = node->children[right]; + + node->children[right] = rnode->children[left]; + + if (rnode->children[left] != NULL) + rbtree_set_parent(rnode->children[left], node); + + rnode->children[left] = node; + rbtree_set_parent(rnode, parent); + + if (unlikely(parent == NULL)) + tree->root = rnode; + else + parent->children[rbtree_index(node, parent)] = rnode; + + rbtree_set_parent(node, rnode); +} + +void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent, + int index, struct rbtree_node *node) +{ + struct rbtree_node *grand_parent, *uncle, *tmp; + int left, right; + + assert(rbtree_check_alignment(parent)); + assert(rbtree_check_alignment(node)); + + node->parent = (unsigned long)parent | RBTREE_COLOR_RED; + node->children[RBTREE_LEFT] = NULL; + node->children[RBTREE_RIGHT] = NULL; + + if (unlikely(parent == NULL)) + tree->root = node; + else + parent->children[index] = node; + + for (;;) { + if (parent == NULL) { + rbtree_set_black(node); + break; + } + + if (rbtree_is_black(parent)) + break; + + grand_parent = rbtree_parent(parent); + assert(grand_parent != NULL); + + left = rbtree_index(parent, grand_parent); + right = 1 - left; + + uncle = grand_parent->children[right]; + + /* + * Case 1: uncle is red. Flip colors and repeat at grand parent. + */ + if ((uncle != NULL) && rbtree_is_red(uncle)) { + rbtree_set_black(uncle); + rbtree_set_black(parent); + rbtree_set_red(grand_parent); + node = grand_parent; + parent = rbtree_parent(node); + continue; + } + + /* + * Case 2: node is the right child of its parent. Rotate left at parent + * to reduce to case 3. + */ + if (parent->children[right] == node) { + rbtree_rotate(tree, parent, left); + tmp = node; + node = parent; + parent = tmp; + } + + /* + * Case 3: node is the left child of its parent. Handle colors, rotate + * right at grand parent, and leave. + */ + rbtree_set_black(parent); + rbtree_set_red(grand_parent); + rbtree_rotate(tree, grand_parent, right); + break; + } + + assert(rbtree_is_black(tree->root)); +} + +void rbtree_remove(struct rbtree *tree, struct rbtree_node *node) +{ + struct rbtree_node *child, *parent, *brother; + int color, left, right; + + if (node->children[RBTREE_LEFT] == NULL) + child = node->children[RBTREE_RIGHT]; + else if (node->children[RBTREE_RIGHT] == NULL) + child = node->children[RBTREE_LEFT]; + else { + struct rbtree_node *successor; + + /* + * Two-children case: replace the node with its successor. + */ + + successor = node->children[RBTREE_RIGHT]; + + while (successor->children[RBTREE_LEFT] != NULL) + successor = successor->children[RBTREE_LEFT]; + + color = rbtree_color(successor); + child = successor->children[RBTREE_RIGHT]; + parent = rbtree_parent(node); + + if (unlikely(parent == NULL)) + tree->root = successor; + else + parent->children[rbtree_index(node, parent)] = successor; + + parent = rbtree_parent(successor); + + /* + * Set parent directly to keep the original color. + */ + successor->parent = node->parent; + successor->children[RBTREE_LEFT] = node->children[RBTREE_LEFT]; + rbtree_set_parent(successor->children[RBTREE_LEFT], successor); + + if (node == parent) + parent = successor; + else { + successor->children[RBTREE_RIGHT] = node->children[RBTREE_RIGHT]; + rbtree_set_parent(successor->children[RBTREE_RIGHT], successor); + parent->children[RBTREE_LEFT] = child; + + if (child != NULL) + rbtree_set_parent(child, parent); + } + + goto update_color; + } + + /* + * Node has at most one child. + */ + + color = rbtree_color(node); + parent = rbtree_parent(node); + + if (child != NULL) + rbtree_set_parent(child, parent); + + if (unlikely(parent == NULL)) + tree->root = child; + else + parent->children[rbtree_index(node, parent)] = child; + + /* + * The node has been removed, update the colors. The child pointer can + * be null, in which case it is considered a black leaf. + */ +update_color: + if (color == RBTREE_COLOR_RED) + return; + + for (;;) { + if ((child != NULL) && rbtree_is_red(child)) { + rbtree_set_black(child); + break; + } + + if (parent == NULL) + break; + + left = rbtree_index(child, parent); + right = 1 - left; + + brother = parent->children[right]; + + /* + * Case 1: brother is red. Recolor and rotate left at parent so that + * brother becomes black. + */ + if (rbtree_is_red(brother)) { + rbtree_set_black(brother); + rbtree_set_red(parent); + rbtree_rotate(tree, parent, left); + brother = parent->children[right]; + } + + /* + * Case 2: brother has no red child. Recolor and repeat at parent. + */ + if (((brother->children[RBTREE_LEFT] == NULL) + || rbtree_is_black(brother->children[RBTREE_LEFT])) + && ((brother->children[RBTREE_RIGHT] == NULL) + || rbtree_is_black(brother->children[RBTREE_RIGHT]))) { + rbtree_set_red(brother); + child = parent; + parent = rbtree_parent(child); + continue; + } + + /* + * Case 3: brother's right child is black. Recolor and rotate right + * at brother to reduce to case 4. + */ + if ((brother->children[right] == NULL) + || rbtree_is_black(brother->children[right])) { + rbtree_set_black(brother->children[left]); + rbtree_set_red(brother); + rbtree_rotate(tree, brother, right); + brother = parent->children[right]; + } + + /* + * Case 4: brother's left child is black. Exchange parent and brother + * colors (we already know brother is black), set brother's right child + * black, rotate left at parent and leave. + */ + rbtree_set_color(brother, rbtree_color(parent)); + rbtree_set_black(parent); + rbtree_set_black(brother->children[right]); + rbtree_rotate(tree, parent, left); + break; + } + + assert((tree->root == NULL) || rbtree_is_black(tree->root)); +} + +struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index, + int direction) +{ + assert(rbtree_check_index(direction)); + + if (parent == NULL) + return NULL; + + assert(rbtree_check_index(index)); + + if (index != direction) + return parent; + + return rbtree_walk(parent, direction); +} + +struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction) +{ + struct rbtree_node *prev, *cur; + + assert(rbtree_check_index(direction)); + + prev = NULL; + + for (cur = tree->root; cur != NULL; cur = cur->children[direction]) + prev = cur; + + return prev; +} + +struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction) +{ + int left, right; + + assert(rbtree_check_index(direction)); + + left = direction; + right = 1 - left; + + if (node == NULL) + return NULL; + + if (node->children[left] != NULL) { + node = node->children[left]; + + while (node->children[right] != NULL) + node = node->children[right]; + } else { + struct rbtree_node *parent; + int index; + + for (;;) { + parent = rbtree_parent(node); + + if (parent == NULL) + return NULL; + + index = rbtree_index(node, parent); + node = parent; + + if (index == right) + break; + } + } + + return node; +} + +/* + * Return the left-most deepest child node of the given node. + */ +static struct rbtree_node * rbtree_find_deepest(struct rbtree_node *node) +{ + struct rbtree_node *parent; + + assert(node != NULL); + + for (;;) { + parent = node; + node = node->children[RBTREE_LEFT]; + + if (node == NULL) { + node = parent->children[RBTREE_RIGHT]; + + if (node == NULL) + return parent; + } + } +} + +struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree) +{ + struct rbtree_node *node; + + node = tree->root; + + if (node == NULL) + return NULL; + + return rbtree_find_deepest(node); +} + +struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node) +{ + struct rbtree_node *parent; + int index; + + if (node == NULL) + return NULL; + + assert(node->children[RBTREE_LEFT] == NULL); + assert(node->children[RBTREE_RIGHT] == NULL); + + parent = rbtree_parent(node); + + if (parent == NULL) + return NULL; + + index = rbtree_index(node, parent); + parent->children[index] = NULL; + node = parent->children[RBTREE_RIGHT]; + + if (node == NULL) + return parent; + + return rbtree_find_deepest(node); +} diff --git a/kern/rbtree.h b/kern/rbtree.h new file mode 100644 index 0000000..b6d62bf --- /dev/null +++ b/kern/rbtree.h @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_RBTREE_H +#define _KERN_RBTREE_H + +#include <stddef.h> +#include <kern/assert.h> +#include <kern/macro_help.h> +#include <kern/rbtree.h> +#include <sys/types.h> + +#define structof(ptr, type, member) \ + ((type *)((char *)ptr - offsetof(type, member))) + +/* + * Indexes of the left and right nodes in the children array of a node. + */ +#define RBTREE_LEFT 0 +#define RBTREE_RIGHT 1 + +/* + * Red-black node. + */ +struct rbtree_node; + +/* + * Red-black tree. + */ +struct rbtree; + +/* + * Static tree initializer. + */ +#define RBTREE_INITIALIZER { NULL } + +#include "rbtree_i.h" + +/* + * Initialize a tree. + */ +static inline void rbtree_init(struct rbtree *tree) +{ + tree->root = NULL; +} + +/* + * Initialize a node. + * + * A node is in no tree when its parent points to itself. + */ +static inline void rbtree_node_init(struct rbtree_node *node) +{ + assert(rbtree_check_alignment(node)); + + node->parent = (unsigned long)node | RBTREE_COLOR_RED; + node->children[RBTREE_LEFT] = NULL; + node->children[RBTREE_RIGHT] = NULL; +} + +/* + * Return true if node is in no tree. + */ +static inline int rbtree_node_unlinked(const struct rbtree_node *node) +{ + return rbtree_parent(node) == node; +} + +/* + * Macro that evaluates to the address of the structure containing the + * given node based on the given type and member. + */ +#define rbtree_entry(node, type, member) structof(node, type, member) + +/* + * Return true if tree is empty. + */ +static inline int rbtree_empty(const struct rbtree *tree) +{ + return tree->root == NULL; +} + +/* + * Look up a node in a tree. + * + * Note that implementing the lookup algorithm as a macro gives two benefits: + * First, it avoids the overhead of a callback function. Next, the type of the + * cmp_fn parameter isn't rigid. The only guarantee offered by this + * implementation is that the key parameter is the first parameter given to + * cmp_fn. This way, users can pass only the value they need for comparison + * instead of e.g. allocating a full structure on the stack. + * + * See rbtree_insert(). + */ +#define rbtree_lookup(tree, key, cmp_fn) \ +MACRO_BEGIN \ + struct rbtree_node *cur; \ + int diff; \ + \ + cur = (tree)->root; \ + \ + while (cur != NULL) { \ + diff = cmp_fn(key, cur); \ + \ + if (diff == 0) \ + break; \ + \ + cur = cur->children[rbtree_d2i(diff)]; \ + } \ + \ + cur; \ +MACRO_END + +/* + * Look up a node or one of its nearest nodes in a tree. + * + * This macro essentially acts as rbtree_lookup() but if no entry matched + * the key, an additional step is performed to obtain the next or previous + * node, depending on the direction (left or right). + * + * The constraints that apply to the key parameter are the same as for + * rbtree_lookup(). + */ +#define rbtree_lookup_nearest(tree, key, cmp_fn, dir) \ +MACRO_BEGIN \ + struct rbtree_node *cur, *prev; \ + int diff, index; \ + \ + prev = NULL; \ + index = -1; \ + cur = (tree)->root; \ + \ + while (cur != NULL) { \ + diff = cmp_fn(key, cur); \ + \ + if (diff == 0) \ + break; \ + \ + prev = cur; \ + index = rbtree_d2i(diff); \ + cur = cur->children[index]; \ + } \ + \ + if (cur == NULL) \ + cur = rbtree_nearest(prev, index, dir); \ + \ + cur; \ +MACRO_END + +/* + * Insert a node in a tree. + * + * This macro performs a standard lookup to obtain the insertion point of + * the given node in the tree (it is assumed that the inserted node never + * compares equal to any other entry in the tree) and links the node. It + * then It then checks red-black rules violations, and rebalances the tree + * if necessary. + * + * Unlike rbtree_lookup(), the cmp_fn parameter must compare two complete + * entries, so it is suggested to use two different comparison inline + * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no + * guarantee about the order of the nodes given to the comparison function. + * + * See rbtree_lookup(). + */ +#define rbtree_insert(tree, node, cmp_fn) \ +MACRO_BEGIN \ + struct rbtree_node *cur, *prev; \ + int diff, index; \ + \ + prev = NULL; \ + index = -1; \ + cur = (tree)->root; \ + \ + while (cur != NULL) { \ + diff = cmp_fn(node, cur); \ + assert(diff != 0); \ + prev = cur; \ + index = rbtree_d2i(diff); \ + cur = cur->children[index]; \ + } \ + \ + rbtree_insert_rebalance(tree, prev, index, node); \ +MACRO_END + +/* + * Look up a node/slot pair in a tree. + * + * This macro essentially acts as rbtree_lookup() but in addition to a node, + * it also returns a slot, which identifies an insertion point in the tree. + * If the returned node is null, the slot can be used by rbtree_insert_slot() + * to insert without the overhead of an additional lookup. The slot is a + * simple unsigned long integer. + * + * The constraints that apply to the key parameter are the same as for + * rbtree_lookup(). + */ +#define rbtree_lookup_slot(tree, key, cmp_fn, slot) \ +MACRO_BEGIN \ + struct rbtree_node *cur, *prev; \ + int diff, index; \ + \ + prev = NULL; \ + index = 0; \ + cur = (tree)->root; \ + \ + while (cur != NULL) { \ + diff = cmp_fn(key, cur); \ + \ + if (diff == 0) \ + break; \ + \ + prev = cur; \ + index = rbtree_d2i(diff); \ + cur = cur->children[index]; \ + } \ + \ + (slot) = rbtree_slot(prev, index); \ + cur; \ +MACRO_END + +/* + * Insert a node at an insertion point in a tree. + * + * This macro essentially acts as rbtree_insert() except that it doesn't + * obtain the insertion point with a standard lookup. The insertion point + * is obtained by calling rbtree_lookup_slot(). In addition, the new node + * must not compare equal to an existing node in the tree (i.e. the slot + * must denote a null node). + */ +#define rbtree_insert_slot(tree, slot, node) \ +MACRO_BEGIN \ + struct rbtree_node *parent; \ + int index; \ + \ + parent = rbtree_slot_parent(slot); \ + index = rbtree_slot_index(slot); \ + rbtree_insert_rebalance(tree, parent, index, node); \ +MACRO_END + +/* + * Remove a node from a tree. + * + * After completion, the node is stale. + */ +void rbtree_remove(struct rbtree *tree, struct rbtree_node *node); + +/* + * Return the first node of a tree. + */ +#define rbtree_first(tree) rbtree_firstlast(tree, RBTREE_LEFT) + +/* + * Return the last node of a tree. + */ +#define rbtree_last(tree) rbtree_firstlast(tree, RBTREE_RIGHT) + +/* + * Return the node previous to the given node. + */ +#define rbtree_prev(node) rbtree_walk(node, RBTREE_LEFT) + +/* + * Return the node next to the given node. + */ +#define rbtree_next(node) rbtree_walk(node, RBTREE_RIGHT) + +/* + * Forge a loop to process all nodes of a tree, removing them when visited. + * + * This macro can only be used to destroy a tree, so that the resources used + * by the entries can be released by the user. It basically removes all nodes + * without doing any color checking. + * + * After completion, all nodes and the tree root member are stale. + */ +#define rbtree_for_each_remove(tree, node, tmp) \ +for (node = rbtree_postwalk_deepest(tree), \ + tmp = rbtree_postwalk_unlink(node); \ + node != NULL; \ + node = tmp, tmp = rbtree_postwalk_unlink(node)) \ + +#endif /* _KERN_RBTREE_H */ diff --git a/kern/rbtree_i.h b/kern/rbtree_i.h new file mode 100644 index 0000000..9befc92 --- /dev/null +++ b/kern/rbtree_i.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2010, 2011 Richard Braun. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_RBTREE_I_H +#define _KERN_RBTREE_I_H + +#include <kern/assert.h> + +/* + * Red-black node structure. + * + * To reduce the number of branches and the instruction cache footprint, + * the left and right child pointers are stored in an array, and the symmetry + * of most tree operations is exploited by using left/right variables when + * referring to children. + * + * In addition, this implementation assumes that all nodes are 4-byte aligned, + * so that the least significant bit of the parent member can be used to store + * the color of the node. This is true for all modern 32 and 64 bits + * architectures, as long as the nodes aren't embedded in structures with + * special alignment constraints such as member packing. + */ +struct rbtree_node { + unsigned long parent; + struct rbtree_node *children[2]; +}; + +/* + * Red-black tree structure. + */ +struct rbtree { + struct rbtree_node *root; +}; + +/* + * Masks applied on the parent member of a node to obtain either the + * color or the parent address. + */ +#define RBTREE_COLOR_MASK 0x1UL +#define RBTREE_PARENT_MASK (~0x3UL) + +/* + * Node colors. + */ +#define RBTREE_COLOR_RED 0 +#define RBTREE_COLOR_BLACK 1 + +/* + * Masks applied on slots to obtain either the child index or the parent + * address. + */ +#define RBTREE_SLOT_INDEX_MASK 0x1UL +#define RBTREE_SLOT_PARENT_MASK (~RBTREE_SLOT_INDEX_MASK) + +/* + * Return true if the given pointer is suitably aligned. + */ +static inline int rbtree_check_alignment(const struct rbtree_node *node) +{ + return ((unsigned long)node & (~RBTREE_PARENT_MASK)) == 0; +} + +/* + * Return true if the given index is a valid child index. + */ +static inline int rbtree_check_index(int index) +{ + return index == (index & 1); +} + +/* + * Convert the result of a comparison into an index in the children array + * (0 or 1). + * + * This function is mostly used when looking up a node. + */ +static inline int rbtree_d2i(int diff) +{ + return !(diff <= 0); +} + +/* + * Return the parent of a node. + */ +static inline struct rbtree_node * rbtree_parent(const struct rbtree_node *node) +{ + return (struct rbtree_node *)(node->parent & RBTREE_PARENT_MASK); +} + +/* + * Translate an insertion point into a slot. + */ +static inline unsigned long rbtree_slot(struct rbtree_node *parent, int index) +{ + assert(rbtree_check_alignment(parent)); + assert(rbtree_check_index(index)); + return (unsigned long)parent | index; +} + +/* + * Extract the parent address from a slot. + */ +static inline struct rbtree_node * rbtree_slot_parent(unsigned long slot) +{ + return (struct rbtree_node *)(slot & RBTREE_SLOT_PARENT_MASK); +} + +/* + * Extract the index from a slot. + */ +static inline int rbtree_slot_index(unsigned long slot) +{ + return slot & RBTREE_SLOT_INDEX_MASK; +} + +/* + * Insert a node in a tree, rebalancing it if necessary. + * + * The index parameter is the index in the children array of the parent where + * the new node is to be inserted. It is ignored if the parent is null. + * + * This function is intended to be used by the rbtree_insert() macro only. + */ +void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent, + int index, struct rbtree_node *node); + +/* + * Return the previous or next node relative to a location in a tree. + * + * The parent and index parameters define the location, which can be empty. + * The direction parameter is either RBTREE_LEFT (to obtain the previous + * node) or RBTREE_RIGHT (to obtain the next one). + */ +struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index, + int direction); + +/* + * Return the first or last node of a tree. + * + * The direction parameter is either RBTREE_LEFT (to obtain the first node) + * or RBTREE_RIGHT (to obtain the last one). + */ +struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction); + +/* + * Return the node next to, or previous to the given node. + * + * The direction parameter is either RBTREE_LEFT (to obtain the previous node) + * or RBTREE_RIGHT (to obtain the next one). + */ +struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction); + +/* + * Return the left-most deepest node of a tree, which is the starting point of + * the postorder traversal performed by rbtree_for_each_remove(). + */ +struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree); + +/* + * Unlink a node from its tree and return the next (right) node in postorder. + */ +struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node); + +#endif /* _KERN_RBTREE_I_H */ diff --git a/kern/server_loop.ch b/kern/server_loop.ch index 1aa7edb..409e013 100644 --- a/kern/server_loop.ch +++ b/kern/server_loop.ch @@ -39,6 +39,7 @@ */ #include <kern/debug.h> +#include <kern/kalloc.h> #include <mach/port.h> #include <mach/message.h> #include <vm/vm_kern.h> /* for kernel_map */ diff --git a/kern/slab.c b/kern/slab.c new file mode 100644 index 0000000..38413e8 --- /dev/null +++ b/kern/slab.c @@ -0,0 +1,1576 @@ +/* + * Copyright (c) 2009, 2010, 2011 Richard Braun. + * Copyright (c) 2011 Maksym Planeta. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * Object caching and general purpose memory allocator. + * + * This allocator is based on the paper "The Slab Allocator: An Object-Caching + * Kernel Memory Allocator" by Jeff Bonwick. + * + * It allows the allocation of objects (i.e. fixed-size typed buffers) from + * caches and is efficient in both space and time. This implementation follows + * many of the indications from the paper mentioned. The most notable + * differences are outlined below. + * + * The per-cache self-scaling hash table for buffer-to-bufctl conversion, + * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by + * a red-black tree storing slabs, sorted by address. The use of a + * self-balancing tree for buffer-to-slab conversions provides a few advantages + * over a hash table. Unlike a hash table, a BST provides a "lookup nearest" + * operation, so obtaining the slab data (whether it is embedded in the slab or + * off slab) from a buffer address simply consists of a "lookup nearest towards + * 0" tree search. Storing slabs instead of buffers also considerably reduces + * the number of elements to retain. Finally, a self-balancing tree is a true + * self-scaling data structure, whereas a hash table requires periodic + * maintenance and complete resizing, which is expensive. The only drawback is + * that releasing a buffer to the slab layer takes logarithmic time instead of + * constant time. But as the data set size is kept reasonable (because slabs + * are stored instead of buffers) and because the CPU pool layer services most + * requests, avoiding many accesses to the slab layer, it is considered an + * acceptable tradeoff. + * + * This implementation uses per-cpu pools of objects, which service most + * allocation requests. These pools act as caches (but are named differently + * to avoid confusion with CPU caches) that reduce contention on multiprocessor + * systems. When a pool is empty and cannot provide an object, it is filled by + * transferring multiple objects from the slab layer. The symmetric case is + * handled likewise. + */ + +#include <string.h> +#include <kern/assert.h> +#include <kern/mach_clock.h> +#include <kern/printf.h> +#include <kern/slab.h> +#include <kern/kalloc.h> +#include <kern/cpu_number.h> +#include <mach/vm_param.h> +#include <mach/machine/vm_types.h> +#include <vm/vm_kern.h> +#include <vm/vm_types.h> +#include <sys/types.h> + +#ifdef MACH_DEBUG +#include <mach_debug/slab_info.h> +#endif + +/* + * Utility macros. + */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0) +#define ISP2(x) P2ALIGNED(x, x) +#define P2ALIGN(x, a) ((x) & -(a)) +#define P2ROUND(x, a) (-(-(x) & -(a))) +#define P2END(x, a) (-(~(x) & -(a))) +#define likely(expr) __builtin_expect(!!(expr), 1) +#define unlikely(expr) __builtin_expect(!!(expr), 0) + +/* + * Minimum required alignment. + */ +#define KMEM_ALIGN_MIN 8 + +/* + * Minimum number of buffers per slab. + * + * This value is ignored when the slab size exceeds a threshold. + */ +#define KMEM_MIN_BUFS_PER_SLAB 8 + +/* + * Special slab size beyond which the minimum number of buffers per slab is + * ignored when computing the slab size of a cache. + */ +#define KMEM_SLAB_SIZE_THRESHOLD (8 * PAGE_SIZE) + +/* + * Special buffer size under which slab data is unconditionnally allocated + * from its associated slab. + */ +#define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8) + +/* + * Time (in seconds) between two garbage collection operations. + */ +#define KMEM_GC_INTERVAL (1 * hz) + +/* + * The transfer size of a CPU pool is computed by dividing the pool size by + * this value. + */ +#define KMEM_CPU_POOL_TRANSFER_RATIO 2 + +/* + * Redzone guard word. + */ +#ifdef __LP64__ +#if _HOST_BIG_ENDIAN +#define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL +#endif /* _HOST_BIG_ENDIAN */ +#else /* __LP64__ */ +#if _HOST_BIG_ENDIAN +#define KMEM_REDZONE_WORD 0xfeedfaceUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_REDZONE_WORD 0xcefaedfeUL +#endif /* _HOST_BIG_ENDIAN */ +#endif /* __LP64__ */ + +/* + * Redzone byte for padding. + */ +#define KMEM_REDZONE_BYTE 0xbb + +/* + * Size of the VM submap from which default backend functions allocate. + */ +#define KMEM_MAP_SIZE (64 * 1024 * 1024) + +/* + * Shift for the first kalloc cache size. + */ +#define KALLOC_FIRST_SHIFT 5 + +/* + * Number of caches backing general purpose allocations. + */ +#define KALLOC_NR_CACHES 13 + +/* + * Size of the VM submap for general purpose allocations. + */ +#define KALLOC_MAP_SIZE (64 * 1024 * 1024) + +/* + * Values the buftag state member can take. + */ +#ifdef __LP64__ +#if _HOST_BIG_ENDIAN +#define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL +#define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL +#define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL +#endif /* _HOST_BIG_ENDIAN */ +#else /* __LP64__ */ +#if _HOST_BIG_ENDIAN +#define KMEM_BUFTAG_ALLOC 0xa110c8edUL +#define KMEM_BUFTAG_FREE 0xf4eeb10cUL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_BUFTAG_ALLOC 0xedc810a1UL +#define KMEM_BUFTAG_FREE 0x0cb1eef4UL +#endif /* _HOST_BIG_ENDIAN */ +#endif /* __LP64__ */ + +/* + * Free and uninitialized patterns. + * + * These values are unconditionnally 64-bit wide since buffers are at least + * 8-byte aligned. + */ +#if _HOST_BIG_ENDIAN +#define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL +#define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL +#else /* _HOST_BIG_ENDIAN */ +#define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL +#define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL +#endif /* _HOST_BIG_ENDIAN */ + +/* + * Cache flags. + * + * The flags don't change once set and can be tested without locking. + */ +#define KMEM_CF_NO_CPU_POOL 0x01 /* CPU pool layer disabled */ +#define KMEM_CF_SLAB_EXTERNAL 0x02 /* Slab data is off slab */ +#define KMEM_CF_NO_RECLAIM 0x04 /* Slabs are not reclaimable */ +#define KMEM_CF_VERIFY 0x08 /* Debugging facilities enabled */ +#define KMEM_CF_DIRECT 0x10 /* No buf-to-slab tree lookup */ + +/* + * Options for kmem_cache_alloc_verify(). + */ +#define KMEM_AV_NOCONSTRUCT 0 +#define KMEM_AV_CONSTRUCT 1 + +/* + * Error codes for kmem_cache_error(). + */ +#define KMEM_ERR_INVALID 0 /* Invalid address being freed */ +#define KMEM_ERR_DOUBLEFREE 1 /* Freeing already free address */ +#define KMEM_ERR_BUFTAG 2 /* Invalid buftag content */ +#define KMEM_ERR_MODIFIED 3 /* Buffer modified while free */ +#define KMEM_ERR_REDZONE 4 /* Redzone violation */ + +#if SLAB_USE_CPU_POOLS +/* + * Available CPU pool types. + * + * For each entry, the CPU pool size applies from the entry buf_size + * (excluded) up to (and including) the buf_size of the preceding entry. + * + * See struct kmem_cpu_pool_type for a description of the values. + */ +static struct kmem_cpu_pool_type kmem_cpu_pool_types[] = { + { 32768, 1, 0, NULL }, + { 4096, 8, CPU_L1_SIZE, NULL }, + { 256, 64, CPU_L1_SIZE, NULL }, + { 0, 128, CPU_L1_SIZE, NULL } +}; + +/* + * Caches where CPU pool arrays are allocated from. + */ +static struct kmem_cache kmem_cpu_array_caches[ARRAY_SIZE(kmem_cpu_pool_types)]; +#endif /* SLAB_USE_CPU_POOLS */ + +/* + * Cache for off slab data. + */ +static struct kmem_cache kmem_slab_cache; + +/* + * General purpose caches array. + */ +static struct kmem_cache kalloc_caches[KALLOC_NR_CACHES]; + +/* + * List of all caches managed by the allocator. + */ +static struct list kmem_cache_list; +static unsigned int kmem_nr_caches; +static simple_lock_data_t __attribute__((used)) kmem_cache_list_lock; + +/* + * VM submap for slab caches (except general purpose allocations). + */ +static struct vm_map kmem_map_store; +vm_map_t kmem_map = &kmem_map_store; + +/* + * VM submap for general purpose allocations. + */ +static struct vm_map kalloc_map_store; +vm_map_t kalloc_map = &kalloc_map_store; + +/* + * Time of the last memory reclaim, in clock ticks. + */ +static unsigned int kmem_gc_last_tick; + +#define kmem_error(format, ...) \ + printf("mem: error: %s(): " format "\n", __func__, \ + ## __VA_ARGS__) + +#define kmem_warn(format, ...) \ + printf("mem: warning: %s(): " format "\n", __func__, \ + ## __VA_ARGS__) + +#define kmem_print(format, ...) \ + printf(format "\n", ## __VA_ARGS__) + +static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error, + void *arg); +static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache); +static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf); + +static void * kmem_buf_verify_bytes(void *buf, void *pattern, size_t size) +{ + char *ptr, *pattern_ptr, *end; + + end = buf + size; + + for (ptr = buf, pattern_ptr = pattern; ptr < end; ptr++, pattern_ptr++) + if (*ptr != *pattern_ptr) + return ptr; + + return NULL; +} + +static void * kmem_buf_verify(void *buf, uint64_t pattern, vm_size_t size) +{ + uint64_t *ptr, *end; + + assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t))); + assert(P2ALIGNED(size, sizeof(uint64_t))); + + end = buf + size; + + for (ptr = buf; ptr < end; ptr++) + if (*ptr != pattern) + return kmem_buf_verify_bytes(ptr, &pattern, sizeof(pattern)); + + return NULL; +} + +static void kmem_buf_fill(void *buf, uint64_t pattern, size_t size) +{ + uint64_t *ptr, *end; + + assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t))); + assert(P2ALIGNED(size, sizeof(uint64_t))); + + end = buf + size; + + for (ptr = buf; ptr < end; ptr++) + *ptr = pattern; +} + +static void * kmem_buf_verify_fill(void *buf, uint64_t old, uint64_t new, + size_t size) +{ + uint64_t *ptr, *end; + + assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t))); + assert(P2ALIGNED(size, sizeof(uint64_t))); + + end = buf + size; + + for (ptr = buf; ptr < end; ptr++) { + if (*ptr != old) + return kmem_buf_verify_bytes(ptr, &old, sizeof(old)); + + *ptr = new; + } + + return NULL; +} + +static inline union kmem_bufctl * +kmem_buf_to_bufctl(void *buf, struct kmem_cache *cache) +{ + return (union kmem_bufctl *)(buf + cache->bufctl_dist); +} + +static inline struct kmem_buftag * +kmem_buf_to_buftag(void *buf, struct kmem_cache *cache) +{ + return (struct kmem_buftag *)(buf + cache->buftag_dist); +} + +static inline void * kmem_bufctl_to_buf(union kmem_bufctl *bufctl, + struct kmem_cache *cache) +{ + return (void *)bufctl - cache->bufctl_dist; +} + +static vm_offset_t kmem_pagealloc(vm_size_t size) +{ + vm_offset_t addr; + kern_return_t kr; + + kr = kmem_alloc_wired(kmem_map, &addr, size); + + if (kr != KERN_SUCCESS) + return 0; + + return addr; +} + +static void kmem_pagefree(vm_offset_t ptr, vm_size_t size) +{ + kmem_free(kmem_map, ptr, size); +} + +static void kmem_slab_create_verify(struct kmem_slab *slab, + struct kmem_cache *cache) +{ + struct kmem_buftag *buftag; + size_t buf_size; + unsigned long buffers; + void *buf; + + buf_size = cache->buf_size; + buf = slab->addr; + buftag = kmem_buf_to_buftag(buf, cache); + + for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { + kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); + buftag->state = KMEM_BUFTAG_FREE; + buf += buf_size; + buftag = kmem_buf_to_buftag(buf, cache); + } +} + +/* + * Create an empty slab for a cache. + * + * The caller must drop all locks before calling this function. + */ +static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache, + size_t color) +{ + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + size_t buf_size; + unsigned long buffers; + void *slab_buf; + + if (cache->slab_alloc_fn == NULL) + slab_buf = (void *)kmem_pagealloc(cache->slab_size); + else + slab_buf = (void *)cache->slab_alloc_fn(cache->slab_size); + + if (slab_buf == NULL) + return NULL; + + if (cache->flags & KMEM_CF_SLAB_EXTERNAL) { + assert(!(cache->flags & KMEM_CF_NO_RECLAIM)); + slab = (struct kmem_slab *)kmem_cache_alloc(&kmem_slab_cache); + + if (slab == NULL) { + if (cache->slab_free_fn == NULL) + kmem_pagefree((vm_offset_t)slab_buf, cache->slab_size); + else + cache->slab_free_fn((vm_offset_t)slab_buf, cache->slab_size); + + return NULL; + } + } else { + slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1; + } + + list_node_init(&slab->list_node); + rbtree_node_init(&slab->tree_node); + slab->nr_refs = 0; + slab->first_free = NULL; + slab->addr = slab_buf + color; + + buf_size = cache->buf_size; + bufctl = kmem_buf_to_bufctl(slab->addr, cache); + + for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { + bufctl->next = slab->first_free; + slab->first_free = bufctl; + bufctl = (union kmem_bufctl *)((void *)bufctl + buf_size); + } + + if (cache->flags & KMEM_CF_VERIFY) + kmem_slab_create_verify(slab, cache); + + return slab; +} + +static void kmem_slab_destroy_verify(struct kmem_slab *slab, + struct kmem_cache *cache) +{ + struct kmem_buftag *buftag; + size_t buf_size; + unsigned long buffers; + void *buf, *addr; + + buf_size = cache->buf_size; + buf = slab->addr; + buftag = kmem_buf_to_buftag(buf, cache); + + for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { + if (buftag->state != KMEM_BUFTAG_FREE) + kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); + + addr = kmem_buf_verify(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); + + if (addr != NULL) + kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr); + + buf += buf_size; + buftag = kmem_buf_to_buftag(buf, cache); + } +} + +/* + * Destroy a slab. + * + * The caller must drop all locks before calling this function. + */ +static void kmem_slab_destroy(struct kmem_slab *slab, struct kmem_cache *cache) +{ + vm_offset_t slab_buf; + + assert(slab->nr_refs == 0); + assert(slab->first_free != NULL); + assert(!(cache->flags & KMEM_CF_NO_RECLAIM)); + + if (cache->flags & KMEM_CF_VERIFY) + kmem_slab_destroy_verify(slab, cache); + + slab_buf = (vm_offset_t)P2ALIGN((unsigned long)slab->addr, PAGE_SIZE); + + if (cache->slab_free_fn == NULL) + kmem_pagefree(slab_buf, cache->slab_size); + else + cache->slab_free_fn(slab_buf, cache->slab_size); + + if (cache->flags & KMEM_CF_SLAB_EXTERNAL) + kmem_cache_free(&kmem_slab_cache, (vm_offset_t)slab); +} + +static inline int kmem_slab_use_tree(int flags) +{ + return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY); +} + +static inline int kmem_slab_cmp_lookup(const void *addr, + const struct rbtree_node *node) +{ + struct kmem_slab *slab; + + slab = rbtree_entry(node, struct kmem_slab, tree_node); + + if (addr == slab->addr) + return 0; + else if (addr < slab->addr) + return -1; + else + return 1; +} + +static inline int kmem_slab_cmp_insert(const struct rbtree_node *a, + const struct rbtree_node *b) +{ + struct kmem_slab *slab; + + slab = rbtree_entry(a, struct kmem_slab, tree_node); + return kmem_slab_cmp_lookup(slab->addr, b); +} + +#if SLAB_USE_CPU_POOLS +static void kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache) +{ + simple_lock_init(&cpu_pool->lock); + cpu_pool->flags = cache->flags; + cpu_pool->size = 0; + cpu_pool->transfer_size = 0; + cpu_pool->nr_objs = 0; + cpu_pool->array = NULL; +} + +/* + * Return a CPU pool. + * + * This function will generally return the pool matching the CPU running the + * calling thread. Because of context switches and thread migration, the + * caller might be running on another processor after this function returns. + * Although not optimal, this should rarely happen, and it doesn't affect the + * allocator operations in any other way, as CPU pools are always valid, and + * their access is serialized by a lock. + */ +static inline struct kmem_cpu_pool * kmem_cpu_pool_get(struct kmem_cache *cache) +{ + return &cache->cpu_pools[cpu_number()]; +} + +static inline void kmem_cpu_pool_build(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache, void **array) +{ + cpu_pool->size = cache->cpu_pool_type->array_size; + cpu_pool->transfer_size = (cpu_pool->size + + KMEM_CPU_POOL_TRANSFER_RATIO - 1) + / KMEM_CPU_POOL_TRANSFER_RATIO; + cpu_pool->array = array; +} + +static inline void * kmem_cpu_pool_pop(struct kmem_cpu_pool *cpu_pool) +{ + cpu_pool->nr_objs--; + return cpu_pool->array[cpu_pool->nr_objs]; +} + +static inline void kmem_cpu_pool_push(struct kmem_cpu_pool *cpu_pool, void *obj) +{ + cpu_pool->array[cpu_pool->nr_objs] = obj; + cpu_pool->nr_objs++; +} + +static int kmem_cpu_pool_fill(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache) +{ + void *obj; + int i; + + simple_lock(&cache->lock); + + for (i = 0; i < cpu_pool->transfer_size; i++) { + obj = kmem_cache_alloc_from_slab(cache); + + if (obj == NULL) + break; + + kmem_cpu_pool_push(cpu_pool, obj); + } + + simple_unlock(&cache->lock); + + return i; +} + +static void kmem_cpu_pool_drain(struct kmem_cpu_pool *cpu_pool, + struct kmem_cache *cache) +{ + void *obj; + int i; + + simple_lock(&cache->lock); + + for (i = cpu_pool->transfer_size; i > 0; i--) { + obj = kmem_cpu_pool_pop(cpu_pool); + kmem_cache_free_to_slab(cache, obj); + } + + simple_unlock(&cache->lock); +} +#endif /* SLAB_USE_CPU_POOLS */ + +static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error, + void *arg) +{ + struct kmem_buftag *buftag; + + kmem_error("cache: %s, buffer: %p", cache->name, (void *)buf); + + switch(error) { + case KMEM_ERR_INVALID: + kmem_error("freeing invalid address"); + break; + case KMEM_ERR_DOUBLEFREE: + kmem_error("attempting to free the same address twice"); + break; + case KMEM_ERR_BUFTAG: + buftag = arg; + kmem_error("invalid buftag content, buftag state: %p", + (void *)buftag->state); + break; + case KMEM_ERR_MODIFIED: + kmem_error("free buffer modified, fault address: %p, " + "offset in buffer: %td", arg, arg - buf); + break; + case KMEM_ERR_REDZONE: + kmem_error("write beyond end of buffer, fault address: %p, " + "offset in buffer: %td", arg, arg - buf); + break; + default: + kmem_error("unknown error"); + } + + /* + * Never reached. + */ +} + +/* + * Compute an appropriate slab size for the given cache. + * + * Once the slab size is known, this function sets the related properties + * (buffers per slab and maximum color). It can also set the KMEM_CF_DIRECT + * and/or KMEM_CF_SLAB_EXTERNAL flags depending on the resulting layout. + */ +static void kmem_cache_compute_sizes(struct kmem_cache *cache, int flags) +{ + size_t i, buffers, buf_size, slab_size, free_slab_size, optimal_size; + size_t waste, waste_min; + int embed, optimal_embed = optimal_embed; + + buf_size = cache->buf_size; + + if (buf_size < KMEM_BUF_SIZE_THRESHOLD) + flags |= KMEM_CACHE_NOOFFSLAB; + + i = 0; + waste_min = (size_t)-1; + + do { + i++; + slab_size = P2ROUND(i * buf_size, PAGE_SIZE); + free_slab_size = slab_size; + + if (flags & KMEM_CACHE_NOOFFSLAB) + free_slab_size -= sizeof(struct kmem_slab); + + buffers = free_slab_size / buf_size; + waste = free_slab_size % buf_size; + + if (buffers > i) + i = buffers; + + if (flags & KMEM_CACHE_NOOFFSLAB) + embed = 1; + else if (sizeof(struct kmem_slab) <= waste) { + embed = 1; + waste -= sizeof(struct kmem_slab); + } else { + embed = 0; + } + + if (waste <= waste_min) { + waste_min = waste; + optimal_size = slab_size; + optimal_embed = embed; + } + } while ((buffers < KMEM_MIN_BUFS_PER_SLAB) + && (slab_size < KMEM_SLAB_SIZE_THRESHOLD)); + + assert(!(flags & KMEM_CACHE_NOOFFSLAB) || optimal_embed); + + cache->slab_size = optimal_size; + slab_size = cache->slab_size - (optimal_embed + ? sizeof(struct kmem_slab) + : 0); + cache->bufs_per_slab = slab_size / buf_size; + cache->color_max = slab_size % buf_size; + + if (cache->color_max >= PAGE_SIZE) + cache->color_max = PAGE_SIZE - 1; + + if (optimal_embed) { + if (cache->slab_size == PAGE_SIZE) + cache->flags |= KMEM_CF_DIRECT; + } else { + cache->flags |= KMEM_CF_SLAB_EXTERNAL; + } +} + +void kmem_cache_init(struct kmem_cache *cache, const char *name, + size_t obj_size, size_t align, kmem_cache_ctor_t ctor, + kmem_slab_alloc_fn_t slab_alloc_fn, + kmem_slab_free_fn_t slab_free_fn, int flags) +{ +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool_type *cpu_pool_type; + size_t i; +#endif /* SLAB_USE_CPU_POOLS */ + size_t buf_size; + +#if SLAB_VERIFY + cache->flags = KMEM_CF_VERIFY; +#else /* SLAB_VERIFY */ + cache->flags = 0; +#endif /* SLAB_VERIFY */ + + if (flags & KMEM_CACHE_NOCPUPOOL) + cache->flags |= KMEM_CF_NO_CPU_POOL; + + if (flags & KMEM_CACHE_NORECLAIM) { + assert(slab_free_fn == NULL); + flags |= KMEM_CACHE_NOOFFSLAB; + cache->flags |= KMEM_CF_NO_RECLAIM; + } + + if (flags & KMEM_CACHE_VERIFY) + cache->flags |= KMEM_CF_VERIFY; + + if (align < KMEM_ALIGN_MIN) + align = KMEM_ALIGN_MIN; + + assert(obj_size > 0); + assert(ISP2(align)); + assert(align < PAGE_SIZE); + + buf_size = P2ROUND(obj_size, align); + + simple_lock_init(&cache->lock); + list_node_init(&cache->node); + list_init(&cache->partial_slabs); + list_init(&cache->free_slabs); + rbtree_init(&cache->active_slabs); + cache->obj_size = obj_size; + cache->align = align; + cache->buf_size = buf_size; + cache->bufctl_dist = buf_size - sizeof(union kmem_bufctl); + cache->color = 0; + cache->nr_objs = 0; + cache->nr_bufs = 0; + cache->nr_slabs = 0; + cache->nr_free_slabs = 0; + cache->ctor = ctor; + cache->slab_alloc_fn = slab_alloc_fn; + cache->slab_free_fn = slab_free_fn; + strncpy(cache->name, name, sizeof(cache->name)); + cache->name[sizeof(cache->name) - 1] = '\0'; + cache->buftag_dist = 0; + cache->redzone_pad = 0; + + if (cache->flags & KMEM_CF_VERIFY) { + cache->bufctl_dist = buf_size; + cache->buftag_dist = cache->bufctl_dist + sizeof(union kmem_bufctl); + cache->redzone_pad = cache->bufctl_dist - cache->obj_size; + buf_size += sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag); + buf_size = P2ROUND(buf_size, align); + cache->buf_size = buf_size; + } + + kmem_cache_compute_sizes(cache, flags); + +#if SLAB_USE_CPU_POOLS + for (cpu_pool_type = kmem_cpu_pool_types; + buf_size <= cpu_pool_type->buf_size; + cpu_pool_type++); + + cache->cpu_pool_type = cpu_pool_type; + + for (i = 0; i < ARRAY_SIZE(cache->cpu_pools); i++) + kmem_cpu_pool_init(&cache->cpu_pools[i], cache); +#endif /* SLAB_USE_CPU_POOLS */ + + simple_lock(&kmem_cache_list_lock); + list_insert_tail(&kmem_cache_list, &cache->node); + kmem_nr_caches++; + simple_unlock(&kmem_cache_list_lock); +} + +static inline int kmem_cache_empty(struct kmem_cache *cache) +{ + return cache->nr_objs == cache->nr_bufs; +} + +static int kmem_cache_grow(struct kmem_cache *cache) +{ + struct kmem_slab *slab; + size_t color; + int empty; + + simple_lock(&cache->lock); + + if (!kmem_cache_empty(cache)) { + simple_unlock(&cache->lock); + return 1; + } + + color = cache->color; + cache->color += cache->align; + + if (cache->color > cache->color_max) + cache->color = 0; + + simple_unlock(&cache->lock); + + slab = kmem_slab_create(cache, color); + + simple_lock(&cache->lock); + + if (slab != NULL) { + list_insert_tail(&cache->free_slabs, &slab->list_node); + cache->nr_bufs += cache->bufs_per_slab; + cache->nr_slabs++; + cache->nr_free_slabs++; + } + + /* + * Even if our slab creation failed, another thread might have succeeded + * in growing the cache. + */ + empty = kmem_cache_empty(cache); + + simple_unlock(&cache->lock); + + return !empty; +} + +static void kmem_cache_reap(struct kmem_cache *cache) +{ + struct kmem_slab *slab; + struct list dead_slabs; + + if (cache->flags & KMEM_CF_NO_RECLAIM) + return; + + list_init(&dead_slabs); + + simple_lock(&cache->lock); + + while (!list_empty(&cache->free_slabs)) { + slab = list_first_entry(&cache->free_slabs, struct kmem_slab, + list_node); + list_remove(&slab->list_node); + list_insert(&dead_slabs, &slab->list_node); + cache->nr_bufs -= cache->bufs_per_slab; + cache->nr_slabs--; + cache->nr_free_slabs--; + } + + simple_unlock(&cache->lock); + + while (!list_empty(&dead_slabs)) { + slab = list_first_entry(&dead_slabs, struct kmem_slab, list_node); + list_remove(&slab->list_node); + kmem_slab_destroy(slab, cache); + } +} + +/* + * Allocate a raw (unconstructed) buffer from the slab layer of a cache. + * + * The cache must be locked before calling this function. + */ +static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache) +{ + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + + if (!list_empty(&cache->partial_slabs)) + slab = list_first_entry(&cache->partial_slabs, struct kmem_slab, + list_node); + else if (!list_empty(&cache->free_slabs)) + slab = list_first_entry(&cache->free_slabs, struct kmem_slab, + list_node); + else + return NULL; + + bufctl = slab->first_free; + assert(bufctl != NULL); + slab->first_free = bufctl->next; + slab->nr_refs++; + cache->nr_objs++; + + /* + * The slab has become complete. + */ + if (slab->nr_refs == cache->bufs_per_slab) { + list_remove(&slab->list_node); + + if (slab->nr_refs == 1) + cache->nr_free_slabs--; + } else if (slab->nr_refs == 1) { + /* + * The slab has become partial. + */ + list_remove(&slab->list_node); + list_insert_tail(&cache->partial_slabs, &slab->list_node); + cache->nr_free_slabs--; + } else if (!list_singular(&cache->partial_slabs)) { + struct list *node; + struct kmem_slab *tmp; + + /* + * The slab remains partial. If there are more than one partial slabs, + * maintain the list sorted. + */ + + assert(slab->nr_refs > 1); + + for (node = list_prev(&slab->list_node); + !list_end(&cache->partial_slabs, node); + node = list_prev(node)) { + tmp = list_entry(node, struct kmem_slab, list_node); + + if (tmp->nr_refs >= slab->nr_refs) + break; + } + + /* + * If the direct neighbor was found, the list is already sorted. + * If no slab was found, the slab is inserted at the head of the list. + */ + if (node != list_prev(&slab->list_node)) { + list_remove(&slab->list_node); + list_insert_after(node, &slab->list_node); + } + } + + if ((slab->nr_refs == 1) && kmem_slab_use_tree(cache->flags)) + rbtree_insert(&cache->active_slabs, &slab->tree_node, + kmem_slab_cmp_insert); + + return kmem_bufctl_to_buf(bufctl, cache); +} + +/* + * Release a buffer to the slab layer of a cache. + * + * The cache must be locked before calling this function. + */ +static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) +{ + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + + if (cache->flags & KMEM_CF_DIRECT) { + assert(cache->slab_size == PAGE_SIZE); + slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size) + - 1; + } else { + struct rbtree_node *node; + + node = rbtree_lookup_nearest(&cache->active_slabs, buf, + kmem_slab_cmp_lookup, RBTREE_LEFT); + assert(node != NULL); + slab = rbtree_entry(node, struct kmem_slab, tree_node); + assert((unsigned long)buf < (P2ALIGN((unsigned long)slab->addr + + cache->slab_size, PAGE_SIZE))); + } + + assert(slab->nr_refs >= 1); + assert(slab->nr_refs <= cache->bufs_per_slab); + bufctl = kmem_buf_to_bufctl(buf, cache); + bufctl->next = slab->first_free; + slab->first_free = bufctl; + slab->nr_refs--; + cache->nr_objs--; + + /* + * The slab has become free. + */ + if (slab->nr_refs == 0) { + if (kmem_slab_use_tree(cache->flags)) + rbtree_remove(&cache->active_slabs, &slab->tree_node); + + /* + * The slab was partial. + */ + if (cache->bufs_per_slab > 1) + list_remove(&slab->list_node); + + list_insert_tail(&cache->free_slabs, &slab->list_node); + cache->nr_free_slabs++; + } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) { + /* + * The slab has become partial. + */ + list_insert(&cache->partial_slabs, &slab->list_node); + } else if (!list_singular(&cache->partial_slabs)) { + struct list *node; + struct kmem_slab *tmp; + + /* + * The slab remains partial. If there are more than one partial slabs, + * maintain the list sorted. + */ + + assert(slab->nr_refs > 0); + + for (node = list_next(&slab->list_node); + !list_end(&cache->partial_slabs, node); + node = list_next(node)) { + tmp = list_entry(node, struct kmem_slab, list_node); + + if (tmp->nr_refs <= slab->nr_refs) + break; + } + + /* + * If the direct neighbor was found, the list is already sorted. + * If no slab was found, the slab is inserted at the tail of the list. + */ + if (node != list_next(&slab->list_node)) { + list_remove(&slab->list_node); + list_insert_before(node, &slab->list_node); + } + } +} + +static void kmem_cache_alloc_verify(struct kmem_cache *cache, void *buf, + int construct) +{ + struct kmem_buftag *buftag; + union kmem_bufctl *bufctl; + void *addr; + + buftag = kmem_buf_to_buftag(buf, cache); + + if (buftag->state != KMEM_BUFTAG_FREE) + kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); + + addr = kmem_buf_verify_fill(buf, KMEM_FREE_PATTERN, KMEM_UNINIT_PATTERN, + cache->bufctl_dist); + + if (addr != NULL) + kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr); + + addr = buf + cache->obj_size; + memset(addr, KMEM_REDZONE_BYTE, cache->redzone_pad); + + bufctl = kmem_buf_to_bufctl(buf, cache); + bufctl->redzone = KMEM_REDZONE_WORD; + buftag->state = KMEM_BUFTAG_ALLOC; + + if (construct && (cache->ctor != NULL)) + cache->ctor(buf); +} + +vm_offset_t kmem_cache_alloc(struct kmem_cache *cache) +{ + int filled; + void *buf; + +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool *cpu_pool; + + cpu_pool = kmem_cpu_pool_get(cache); + + if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL) + goto slab_alloc; + + simple_lock(&cpu_pool->lock); + +fast_alloc: + if (likely(cpu_pool->nr_objs > 0)) { + buf = kmem_cpu_pool_pop(cpu_pool); + simple_unlock(&cpu_pool->lock); + + if (cpu_pool->flags & KMEM_CF_VERIFY) + kmem_cache_alloc_verify(cache, buf, KMEM_AV_CONSTRUCT); + + return (vm_offset_t)buf; + } + + if (cpu_pool->array != NULL) { + filled = kmem_cpu_pool_fill(cpu_pool, cache); + + if (!filled) { + simple_unlock(&cpu_pool->lock); + + filled = kmem_cache_grow(cache); + + if (!filled) + return 0; + + simple_lock(&cpu_pool->lock); + } + + goto fast_alloc; + } + + simple_unlock(&cpu_pool->lock); +#endif /* SLAB_USE_CPU_POOLS */ + +slab_alloc: + simple_lock(&cache->lock); + buf = kmem_cache_alloc_from_slab(cache); + simple_unlock(&cache->lock); + + if (buf == NULL) { + filled = kmem_cache_grow(cache); + + if (!filled) + return 0; + + goto slab_alloc; + } + + if (cache->flags & KMEM_CF_VERIFY) + kmem_cache_alloc_verify(cache, buf, KMEM_AV_NOCONSTRUCT); + + if (cache->ctor != NULL) + cache->ctor(buf); + + return (vm_offset_t)buf; +} + +static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf) +{ + struct rbtree_node *node; + struct kmem_buftag *buftag; + struct kmem_slab *slab; + union kmem_bufctl *bufctl; + unsigned char *redzone_byte; + unsigned long slabend; + + simple_lock(&cache->lock); + node = rbtree_lookup_nearest(&cache->active_slabs, buf, + kmem_slab_cmp_lookup, RBTREE_LEFT); + simple_unlock(&cache->lock); + + if (node == NULL) + kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); + + slab = rbtree_entry(node, struct kmem_slab, tree_node); + slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE); + + if ((unsigned long)buf >= slabend) + kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); + + if ((((unsigned long)buf - (unsigned long)slab->addr) % cache->buf_size) + != 0) + kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); + + /* + * As the buffer address is valid, accessing its buftag is safe. + */ + buftag = kmem_buf_to_buftag(buf, cache); + + if (buftag->state != KMEM_BUFTAG_ALLOC) { + if (buftag->state == KMEM_BUFTAG_FREE) + kmem_cache_error(cache, buf, KMEM_ERR_DOUBLEFREE, NULL); + else + kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); + } + + redzone_byte = buf + cache->obj_size; + bufctl = kmem_buf_to_bufctl(buf, cache); + + while (redzone_byte < (unsigned char *)bufctl) { + if (*redzone_byte != KMEM_REDZONE_BYTE) + kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); + + redzone_byte++; + } + + if (bufctl->redzone != KMEM_REDZONE_WORD) { + unsigned long word; + + word = KMEM_REDZONE_WORD; + redzone_byte = kmem_buf_verify_bytes(&bufctl->redzone, &word, + sizeof(bufctl->redzone)); + kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); + } + + kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); + buftag->state = KMEM_BUFTAG_FREE; +} + +void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj) +{ +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool *cpu_pool; + void **array; + + cpu_pool = kmem_cpu_pool_get(cache); + + if (cpu_pool->flags & KMEM_CF_VERIFY) { +#else /* SLAB_USE_CPU_POOLS */ + if (cache->flags & KMEM_CF_VERIFY) { +#endif /* SLAB_USE_CPU_POOLS */ + kmem_cache_free_verify(cache, (void *)obj); + } + +#if SLAB_USE_CPU_POOLS + if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL) + goto slab_free; + + simple_lock(&cpu_pool->lock); + +fast_free: + if (likely(cpu_pool->nr_objs < cpu_pool->size)) { + kmem_cpu_pool_push(cpu_pool, (void *)obj); + simple_unlock(&cpu_pool->lock); + return; + } + + if (cpu_pool->array != NULL) { + kmem_cpu_pool_drain(cpu_pool, cache); + goto fast_free; + } + + simple_unlock(&cpu_pool->lock); + + array = (void *)kmem_cache_alloc(cache->cpu_pool_type->array_cache); + + if (array != NULL) { + simple_lock(&cpu_pool->lock); + + /* + * Another thread may have built the CPU pool while the lock was + * dropped. + */ + if (cpu_pool->array != NULL) { + simple_unlock(&cpu_pool->lock); + kmem_cache_free(cache->cpu_pool_type->array_cache, + (vm_offset_t)array); + goto fast_free; + } + + kmem_cpu_pool_build(cpu_pool, cache, array); + goto fast_free; + } + +slab_free: +#endif /* SLAB_USE_CPU_POOLS */ + + kmem_cache_free_to_slab(cache, (void *)obj); +} + +void slab_collect(void) +{ + struct kmem_cache *cache; + + if (sched_tick <= (kmem_gc_last_tick + KMEM_GC_INTERVAL)) + return; + + kmem_gc_last_tick = sched_tick; + + simple_lock(&mem_cache_list_lock); + + list_for_each_entry(&kmem_cache_list, cache, node) + kmem_cache_reap(cache); + + simple_unlock(&mem_cache_list_lock); +} + +void slab_bootstrap(void) +{ + /* Make sure a bufctl can always be stored in a buffer */ + assert(sizeof(union kmem_bufctl) <= KMEM_ALIGN_MIN); + + list_init(&kmem_cache_list); + simple_lock_init(&kmem_cache_list_lock); +} + +void slab_init(void) +{ + vm_offset_t min, max; + +#if SLAB_USE_CPU_POOLS + struct kmem_cpu_pool_type *cpu_pool_type; + char name[KMEM_CACHE_NAME_SIZE]; + size_t i, size; +#endif /* SLAB_USE_CPU_POOLS */ + + kmem_submap(kmem_map, kernel_map, &min, &max, KMEM_MAP_SIZE, FALSE); + +#if SLAB_USE_CPU_POOLS + for (i = 0; i < ARRAY_SIZE(kmem_cpu_pool_types); i++) { + cpu_pool_type = &kmem_cpu_pool_types[i]; + cpu_pool_type->array_cache = &kmem_cpu_array_caches[i]; + sprintf(name, "kmem_cpu_array_%d", cpu_pool_type->array_size); + size = sizeof(void *) * cpu_pool_type->array_size; + kmem_cache_init(cpu_pool_type->array_cache, name, size, + cpu_pool_type->array_align, NULL, NULL, NULL, 0); + } +#endif /* SLAB_USE_CPU_POOLS */ + + /* + * Prevent off slab data for the slab cache to avoid infinite recursion. + */ + kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab), + 0, NULL, NULL, NULL, KMEM_CACHE_NOOFFSLAB); +} + +static vm_offset_t kalloc_pagealloc(vm_size_t size) +{ + vm_offset_t addr; + kern_return_t kr; + + kr = kmem_alloc_wired(kalloc_map, &addr, size); + + if (kr != KERN_SUCCESS) + return 0; + + return addr; +} + +static void kalloc_pagefree(vm_offset_t ptr, vm_size_t size) +{ + kmem_free(kalloc_map, ptr, size); +} + +void kalloc_init(void) +{ + char name[KMEM_CACHE_NAME_SIZE]; + size_t i, size; + vm_offset_t min, max; + + kmem_submap(kalloc_map, kernel_map, &min, &max, KALLOC_MAP_SIZE, FALSE); + + size = 1 << KALLOC_FIRST_SHIFT; + + for (i = 0; i < ARRAY_SIZE(kalloc_caches); i++) { + sprintf(name, "kalloc_%u", size); + kmem_cache_init(&kalloc_caches[i], name, size, 0, NULL, + kalloc_pagealloc, kalloc_pagefree, 0); + size <<= 1; + } +} + +/* + * Return the kalloc cache index matching the given allocation size, which + * must be strictly greater than 0. + */ +static inline size_t kalloc_get_index(unsigned long size) +{ + assert(size != 0); + + size = (size - 1) >> KALLOC_FIRST_SHIFT; + + if (size == 0) + return 0; + else + return (sizeof(long) * 8) - __builtin_clzl(size); +} + +static void kalloc_verify(struct kmem_cache *cache, void *buf, size_t size) +{ + size_t redzone_size; + void *redzone; + + assert(size <= cache->obj_size); + + redzone = buf + size; + redzone_size = cache->obj_size - size; + memset(redzone, KMEM_REDZONE_BYTE, redzone_size); +} + +vm_offset_t kalloc(vm_size_t size) +{ + size_t index; + void *buf; + + if (size == 0) + return 0; + + index = kalloc_get_index(size); + + if (index < ARRAY_SIZE(kalloc_caches)) { + struct kmem_cache *cache; + + cache = &kalloc_caches[index]; + buf = (void *)kmem_cache_alloc(cache); + + if ((buf != 0) && (cache->flags & KMEM_CF_VERIFY)) + kalloc_verify(cache, buf, size); + } else + buf = (void *)kalloc_pagealloc(size); + + return (vm_offset_t)buf; +} + +static void kfree_verify(struct kmem_cache *cache, void *buf, size_t size) +{ + unsigned char *redzone_byte, *redzone_end; + + assert(size <= cache->obj_size); + + redzone_byte = buf + size; + redzone_end = buf + cache->obj_size; + + while (redzone_byte < redzone_end) { + if (*redzone_byte != KMEM_REDZONE_BYTE) + kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); + + redzone_byte++; + } +} + +void kfree(vm_offset_t data, vm_size_t size) +{ + size_t index; + + if ((data == 0) || (size == 0)) + return; + + index = kalloc_get_index(size); + + if (index < ARRAY_SIZE(kalloc_caches)) { + struct kmem_cache *cache; + + cache = &kalloc_caches[index]; + + if (cache->flags & KMEM_CF_VERIFY) + kfree_verify(cache, (void *)data, size); + + kmem_cache_free(cache, data); + } else { + kalloc_pagefree(data, size); + } +} + +#if MACH_DEBUG +kern_return_t host_slab_info(host_t host, cache_info_array_t *infop, + unsigned int *infoCntp) +{ + struct kmem_cache *cache; + cache_info_t *info; + unsigned int i, nr_caches; + vm_size_t info_size = info_size; + kern_return_t kr; + + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + /* + * Assume the cache list is unaltered once the kernel is ready. + */ + + simple_lock(&mem_cache_list_lock); + nr_caches = kmem_nr_caches; + simple_unlock(&mem_cache_list_lock); + + if (nr_caches <= *infoCntp) + info = *infop; + else { + vm_offset_t info_addr; + + info_size = round_page(nr_caches * sizeof(*info)); + kr = kmem_alloc_pageable(ipc_kernel_map, &info_addr, info_size); + + if (kr != KERN_SUCCESS) + return kr; + + info = (cache_info_t *)info_addr; + } + + if (info == NULL) + return KERN_RESOURCE_SHORTAGE; + + i = 0; + + list_for_each_entry(&kmem_cache_list, cache, node) { + simple_lock(&cache_lock); + info[i].flags = ((cache->flags & KMEM_CF_NO_CPU_POOL) + ? CACHE_FLAGS_NO_CPU_POOL : 0) + | ((cache->flags & KMEM_CF_SLAB_EXTERNAL) + ? CACHE_FLAGS_SLAB_EXTERNAL : 0) + | ((cache->flags & KMEM_CF_NO_RECLAIM) + ? CACHE_FLAGS_NO_RECLAIM : 0) + | ((cache->flags & KMEM_CF_VERIFY) + ? CACHE_FLAGS_VERIFY : 0) + | ((cache->flags & KMEM_CF_DIRECT) + ? CACHE_FLAGS_DIRECT : 0); +#if SLAB_USE_CPU_POOLS + info[i].cpu_pool_size = cache->cpu_pool_type->array_size; +#else /* SLAB_USE_CPU_POOLS */ + info[i].cpu_pool_size = 0; +#endif /* SLAB_USE_CPU_POOLS */ + info[i].obj_size = cache->obj_size; + info[i].align = cache->align; + info[i].buf_size = cache->buf_size; + info[i].slab_size = cache->slab_size; + info[i].bufs_per_slab = cache->bufs_per_slab; + info[i].nr_objs = cache->nr_objs; + info[i].nr_bufs = cache->nr_bufs; + info[i].nr_slabs = cache->nr_slabs; + info[i].nr_free_slabs = cache->nr_free_slabs; + strncpy(info[i].name, cache->name, sizeof(info[i].name)); + info[i].name[sizeof(info[i].name) - 1] = '\0'; + simple_unlock(&cache->lock); + + i++; + } + + if (info != *infop) { + vm_map_copy_t copy; + vm_size_t used; + + used = nr_caches * sizeof(*info); + + if (used != info_size) + memset((char *)info + used, 0, info_size - used); + + kr = vm_map_copyin(ipc_kernel_map, (vm_offset_t)info, used, TRUE, + ©); + + assert(kr == KERN_SUCCESS); + *infop = (cache_info_t *)copy; + } + + *infoCntp = nr_caches; + + return KERN_SUCCESS; +} +#endif /* MACH_DEBUG */ diff --git a/kern/slab.h b/kern/slab.h new file mode 100644 index 0000000..14c820b --- /dev/null +++ b/kern/slab.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2009, 2010, 2011 Richard Braun. + * Copyright (c) 2011 Maksym Planeta. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _KERN_SLAB_H +#define _KERN_SLAB_H + +#include <kern/lock.h> +#include <kern/list.h> +#include <kern/rbtree.h> +#include <mach/machine/vm_types.h> +#include <sys/types.h> +#include <vm/vm_types.h> + +#if SLAB_USE_CPU_POOLS +/* + * L1 cache line size. + */ +#define CPU_L1_SIZE (1 << CPU_L1_SHIFT) + +/* + * Per-processor cache of pre-constructed objects. + * + * The flags member is a read-only CPU-local copy of the parent cache flags. + */ +struct kmem_cpu_pool { + simple_lock_data_t lock; + int flags; + int size; + int transfer_size; + int nr_objs; + void **array; +} __attribute__((aligned(CPU_L1_SIZE))); + +/* + * When a cache is created, its CPU pool type is determined from the buffer + * size. For small buffer sizes, many objects can be cached in a CPU pool. + * Conversely, for large buffer sizes, this would incur much overhead, so only + * a few objects are stored in a CPU pool. + */ +struct kmem_cpu_pool_type { + size_t buf_size; + int array_size; + size_t array_align; + struct kmem_cache *array_cache; +}; +#endif /* SLAB_USE_CPU_POOLS */ + +/* + * Buffer descriptor. + * + * For normal caches (i.e. without SLAB_CF_VERIFY), bufctls are located at the + * end of (but inside) each buffer. If SLAB_CF_VERIFY is set, bufctls are + * located after each buffer. + * + * When an object is allocated to a client, its bufctl isn't used. This memory + * is instead used for redzoning if cache debugging is in effect. + */ +union kmem_bufctl { + union kmem_bufctl *next; + unsigned long redzone; +}; + +/* + * Buffer tag. + * + * This structure is only used for SLAB_CF_VERIFY caches. It is located after + * the bufctl and includes information about the state of the buffer it + * describes (allocated or not). It should be thought of as a debugging + * extension of the bufctl. + */ +struct kmem_buftag { + unsigned long state; +}; + +/* + * Page-aligned collection of unconstructed buffers. + */ +struct kmem_slab { + struct list list_node; + struct rbtree_node tree_node; + unsigned long nr_refs; + union kmem_bufctl *first_free; + void *addr; +}; + +/* + * Type for constructor functions. + * + * The pre-constructed state of an object is supposed to include only + * elements such as e.g. linked lists, locks, reference counters. Therefore + * constructors are expected to 1) never fail and 2) not need any + * user-provided data. The first constraint implies that object construction + * never performs dynamic resource allocation, which also means there is no + * need for destructors. + */ +typedef void (*kmem_cache_ctor_t)(void *obj); + +/* + * Types for slab allocation/free functions. + * + * All addresses and sizes must be page-aligned. + */ +typedef vm_offset_t (*kmem_slab_alloc_fn_t)(vm_size_t); +typedef void (*kmem_slab_free_fn_t)(vm_offset_t, vm_size_t); + +/* + * Cache name buffer size. + */ +#define KMEM_CACHE_NAME_SIZE 32 + +/* + * Cache of objects. + * + * Locking order : cpu_pool -> cache. CPU pools locking is ordered by CPU ID. + * + * The partial slabs list is sorted by slab references. Slabs with a high + * number of references are placed first on the list to reduce fragmentation. + * Sorting occurs at insertion/removal of buffers in a slab. As the list + * is maintained sorted, and the number of references only changes by one, + * this is a very cheap operation in the average case and the worst (linear) + * case is very unlikely. + */ +struct kmem_cache { +#if SLAB_USE_CPU_POOLS + /* CPU pool layer */ + struct kmem_cpu_pool cpu_pools[NCPUS]; + struct kmem_cpu_pool_type *cpu_pool_type; +#endif /* SLAB_USE_CPU_POOLS */ + + /* Slab layer */ + simple_lock_data_t lock; + struct list node; /* Cache list linkage */ + struct list partial_slabs; + struct list free_slabs; + struct rbtree active_slabs; + int flags; + size_t obj_size; /* User-provided size */ + size_t align; + size_t buf_size; /* Aligned object size */ + size_t bufctl_dist; /* Distance from buffer to bufctl */ + size_t slab_size; + size_t color; + size_t color_max; + unsigned long bufs_per_slab; + unsigned long nr_objs; /* Number of allocated objects */ + unsigned long nr_bufs; /* Total number of buffers */ + unsigned long nr_slabs; + unsigned long nr_free_slabs; + kmem_cache_ctor_t ctor; + kmem_slab_alloc_fn_t slab_alloc_fn; + kmem_slab_free_fn_t slab_free_fn; + char name[KMEM_CACHE_NAME_SIZE]; + size_t buftag_dist; /* Distance from buffer to buftag */ + size_t redzone_pad; /* Bytes from end of object to redzone word */ +}; + +/* + * Mach-style declarations for struct kmem_cache. + */ +typedef struct kmem_cache *kmem_cache_t; +#define KMEM_CACHE_NULL ((kmem_cache_t) 0) + +/* + * VM submap for slab allocations. + */ +extern vm_map_t kmem_map; + +/* + * Cache initialization flags. + */ +#define KMEM_CACHE_NOCPUPOOL 0x1 /* Don't use the per-cpu pools */ +#define KMEM_CACHE_NOOFFSLAB 0x2 /* Don't allocate external slab data */ +#define KMEM_CACHE_NORECLAIM 0x4 /* Never give slabs back to their source, + implies KMEM_CACHE_NOOFFSLAB */ +#define KMEM_CACHE_VERIFY 0x8 /* Use debugging facilities */ + +/* + * Initialize a cache. + */ +void kmem_cache_init(struct kmem_cache *cache, const char *name, + size_t obj_size, size_t align, kmem_cache_ctor_t ctor, + kmem_slab_alloc_fn_t slab_alloc_fn, + kmem_slab_free_fn_t slab_free_fn, int flags); + +/* + * Allocate an object from a cache. + */ +vm_offset_t kmem_cache_alloc(struct kmem_cache *cache); + +/* + * Release an object to its cache. + */ +void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj); + +/* + * Initialize the memory allocator module. + */ +void slab_bootstrap(void); +void slab_init(void); + +/* + * Release free slabs to the VM system. + */ +void slab_collect(void); + +#endif /* _KERN_SLAB_H */ diff --git a/kern/startup.c b/kern/startup.c index a4b5a6f..3bdda16 100644 --- a/kern/startup.c +++ b/kern/startup.c @@ -48,7 +48,6 @@ #include <kern/timer.h> #include <kern/xpr.h> #include <kern/time_stamp.h> -#include <kern/zalloc.h> #include <vm/vm_kern.h> #include <vm/vm_map.h> #include <vm/vm_object.h> diff --git a/kern/task.c b/kern/task.c index 88da16e..2624dd9 100644 --- a/kern/task.c +++ b/kern/task.c @@ -40,10 +40,9 @@ #include <ipc/ipc_space.h> #include <ipc/ipc_types.h> #include <kern/debug.h> -#include <kern/mach_param.h> #include <kern/task.h> #include <kern/thread.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <kern/kalloc.h> #include <kern/processor.h> #include <kern/sched_prim.h> /* for thread_wakeup */ @@ -52,7 +51,7 @@ #include <machine/machspl.h> /* for splsched */ task_t kernel_task = TASK_NULL; -zone_t task_zone; +struct kmem_cache task_cache; extern void eml_init(void); extern void eml_task_reference(task_t, task_t); @@ -60,11 +59,8 @@ extern void eml_task_deallocate(task_t); void task_init(void) { - task_zone = zinit( - sizeof(struct task), 0, - TASK_MAX * sizeof(struct task), - TASK_CHUNK * sizeof(struct task), - 0, "tasks"); + kmem_cache_init(&task_cache, "task", sizeof(struct task), 0, + NULL, NULL, NULL, 0); eml_init(); machine_task_module_init (); @@ -77,38 +73,6 @@ void task_init(void) (void) task_create(TASK_NULL, FALSE, &kernel_task); } -/* - * Create a task running in the kernel address space. It may - * have its own map of size mem_size (if 0, it uses the kernel map), - * and may have ipc privileges. - */ -task_t kernel_task_create( - task_t parent_task, - vm_size_t map_size) -{ - task_t new_task; - vm_offset_t min, max; - - /* - * Create the task. - */ - (void) task_create(parent_task, FALSE, &new_task); - - /* - * Task_create creates the task with a user-space map. - * Remove the map and replace it with the kernel map - * or a submap of the kernel map. - */ - vm_map_deallocate(new_task->map); - if (map_size == 0) - new_task->map = kernel_map; - else - new_task->map = kmem_suballoc(kernel_map, &min, &max, - map_size, FALSE); - - return new_task; -} - kern_return_t task_create( task_t parent_task, boolean_t inherit_memory, @@ -120,7 +84,7 @@ kern_return_t task_create( int i; #endif - new_task = (task_t) zalloc(task_zone); + new_task = (task_t) kmem_cache_alloc(&task_cache); if (new_task == TASK_NULL) { panic("task_create: no memory for task structure"); } @@ -235,7 +199,7 @@ void task_deallocate( pset_deallocate(pset); vm_map_deallocate(task->map); is_release(task->itk_space); - zfree(task_zone, (vm_offset_t) task); + kmem_cache_free(&task_cache, (vm_offset_t) task); } void task_reference( diff --git a/kern/task.h b/kern/task.h index 9d90243..c425158 100644 --- a/kern/task.h +++ b/kern/task.h @@ -162,8 +162,6 @@ extern kern_return_t task_hold(task_t); extern kern_return_t task_dowait(task_t, boolean_t); extern kern_return_t task_release(task_t); -extern task_t kernel_task_create(task_t, vm_size_t); - extern task_t kernel_task; #endif /* _KERN_TASK_H_ */ diff --git a/kern/thread.c b/kern/thread.c index bf2df94..74e1c4b 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -45,7 +45,6 @@ #include <kern/eventcount.h> #include <kern/ipc_mig.h> #include <kern/ipc_tt.h> -#include <kern/mach_param.h> #include <kern/processor.h> #include <kern/queue.h> #include <kern/sched.h> @@ -54,7 +53,8 @@ #include <kern/thread.h> #include <kern/thread_swap.h> #include <kern/host.h> -#include <kern/zalloc.h> +#include <kern/kalloc.h> +#include <kern/slab.h> #include <kern/mach_clock.h> #include <vm/vm_kern.h> #include <ipc/ipc_kmsg.h> @@ -67,7 +67,7 @@ thread_t active_threads[NCPUS]; vm_offset_t active_stacks[NCPUS]; -struct zone *thread_zone; +struct kmem_cache thread_cache; queue_head_t reaper_queue; decl_simple_lock_data(, reaper_lock) @@ -300,11 +300,8 @@ void stack_privilege( void thread_init(void) { - thread_zone = zinit( - sizeof(struct thread), 0, - THREAD_MAX * sizeof(struct thread), - THREAD_CHUNK * sizeof(struct thread), - 0, "threads"); + kmem_cache_init(&thread_cache, "thread", sizeof(struct thread), 0, + NULL, NULL, NULL, 0); /* * Fill in a template thread for fast initialization. @@ -414,7 +411,7 @@ kern_return_t thread_create( * Allocate a thread and initialize static fields */ - new_thread = (thread_t) zalloc(thread_zone); + new_thread = (thread_t) kmem_cache_alloc(&thread_cache); if (new_thread == THREAD_NULL) return KERN_RESOURCE_SHORTAGE; @@ -710,7 +707,7 @@ void thread_deallocate( evc_notify_abort(thread); pcb_terminate(thread); - zfree(thread_zone, (vm_offset_t) thread); + kmem_cache_free(&thread_cache, (vm_offset_t) thread); } void thread_reference( diff --git a/kern/zalloc.c b/kern/zalloc.c deleted file mode 100644 index 43836a6..0000000 --- a/kern/zalloc.c +++ /dev/null @@ -1,1007 +0,0 @@ -/* - * Mach Operating System - * Copyright (c) 1993-1987 Carnegie Mellon University. - * Copyright (c) 1993,1994 The University of Utah and - * the Computer Systems Laboratory (CSL). - * All rights reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF - * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY - * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF - * THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - * File: kern/zalloc.c - * Author: Avadis Tevanian, Jr. - * - * Zone-based memory allocator. A zone is a collection of fixed size - * data blocks for which quick allocation/deallocation is possible. - */ - -#include <string.h> - -#include <kern/debug.h> -#include <kern/macro_help.h> -#include <kern/printf.h> -#include <kern/mach_clock.h> -#include <kern/sched.h> -#include <kern/zalloc.h> -#include <mach/vm_param.h> -#include <vm/vm_kern.h> -#include <machine/machspl.h> - -#if MACH_DEBUG -#include <mach/kern_return.h> -#include <mach/machine/vm_types.h> -#include <mach_debug/zone_info.h> -#include <kern/host.h> -#include <vm/vm_map.h> -#include <vm/vm_user.h> -#include <vm/vm_kern.h> -#endif - -#define ADD_TO_ZONE(zone, element) \ -MACRO_BEGIN \ - *((vm_offset_t *)(element)) = (zone)->free_elements; \ - (zone)->free_elements = (vm_offset_t) (element); \ - zone_count_down(zone); \ -MACRO_END - -#define REMOVE_FROM_ZONE(zone, ret, type) \ -MACRO_BEGIN \ - (ret) = (type) (zone)->free_elements; \ - if ((ret) != (type) 0) { \ - zone_count_up(zone); \ - (zone)->free_elements = *((vm_offset_t *)(ret)); \ - } \ -MACRO_END - -#define ALIGN_SIZE_UP(size, align) \ -((size) = (((size) + ((align) - 1)) & ~((align) - 1))) - -/* - * Support for garbage collection of unused zone pages: - */ - -struct zone_page_table_entry { - struct zone_page_table_entry *next; - short in_free_list; - short alloc_count; -}; - -extern struct zone_page_table_entry * zone_page_table; -extern vm_offset_t zone_map_min_address; - -#define lock_zone_page_table() simple_lock(&zone_page_table_lock) -#define unlock_zone_page_table() simple_unlock(&zone_page_table_lock) - -#define zone_page(addr) \ - (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))])) - - -extern void zone_page_alloc(vm_offset_t, vm_size_t); -extern void zone_page_dealloc(vm_offset_t, vm_size_t); -extern void zone_page_in_use(vm_offset_t, vm_size_t); -extern void zone_page_free(vm_offset_t, vm_size_t); - -zone_t zone_zone; /* this is the zone containing other zones */ - -boolean_t zone_ignore_overflow = TRUE; - -vm_map_t zone_map = VM_MAP_NULL; -vm_size_t zone_map_size = 64 * 1024 * 1024; - -/* - * The VM system gives us an initial chunk of memory. - * It has to be big enough to allocate the zone_zone - * and some initial kernel data structures, like kernel maps. - * It is advantageous to make it bigger than really necessary, - * because this memory is more efficient than normal kernel - * virtual memory. (It doesn't have vm_page structures backing it - * and it may have other machine-dependent advantages.) - * So for best performance, zdata_size should approximate - * the amount of memory you expect the zone system to consume. - */ - -vm_offset_t zdata; -vm_size_t zdata_size = 420 * 1024; - -#define zone_lock(zone) \ -MACRO_BEGIN \ - if (zone->type & ZONE_PAGEABLE) { \ - lock_write(&zone->complex_lock); \ - } else { \ - simple_lock(&zone->lock); \ - } \ -MACRO_END - -#define zone_unlock(zone) \ -MACRO_BEGIN \ - if (zone->type & ZONE_PAGEABLE) { \ - lock_done(&zone->complex_lock); \ - } else { \ - simple_unlock(&zone->lock); \ - } \ -MACRO_END - -#define zone_lock_init(zone) \ -MACRO_BEGIN \ - if (zone->type & ZONE_PAGEABLE) { \ - lock_init(&zone->complex_lock, TRUE); \ - } else { \ - simple_lock_init(&zone->lock); \ - } \ -MACRO_END - -static vm_offset_t zget_space(vm_offset_t size, vm_size_t align); - -decl_simple_lock_data(,zget_space_lock) -vm_offset_t zalloc_next_space; -vm_offset_t zalloc_end_of_space; -vm_size_t zalloc_wasted_space; - -/* - * Garbage collection map information - */ -decl_simple_lock_data(,zone_page_table_lock) -struct zone_page_table_entry * zone_page_table; -vm_offset_t zone_map_min_address; -vm_offset_t zone_map_max_address; -int zone_pages; - -extern void zone_page_init(vm_offset_t, vm_size_t, int); - -#define ZONE_PAGE_USED 0 -#define ZONE_PAGE_UNUSED -1 - - -/* - * Protects first_zone, last_zone, num_zones, - * and the next_zone field of zones. - */ -decl_simple_lock_data(,all_zones_lock) -zone_t first_zone; -zone_t *last_zone; -int num_zones; - -/* - * zinit initializes a new zone. The zone data structures themselves - * are stored in a zone, which is initially a static structure that - * is initialized by zone_init. - */ -zone_t zinit(size, align, max, alloc, memtype, name) - vm_size_t size; /* the size of an element */ - vm_size_t align; /* alignment of elements */ - vm_size_t max; /* maximum memory to use */ - vm_size_t alloc; /* allocation size */ - unsigned int memtype; /* flags specifying type of memory */ - char *name; /* a name for the zone */ -{ - register zone_t z; - - if (zone_zone == ZONE_NULL) - z = (zone_t) zget_space(sizeof(struct zone), 0); - else - z = (zone_t) zalloc(zone_zone); - if (z == ZONE_NULL) - panic("zinit"); - if (alloc == 0) - alloc = PAGE_SIZE; - - if (size == 0) - size = sizeof(z->free_elements); - /* - * Round off all the parameters appropriately. - */ - - if ((max = round_page(max)) < (alloc = round_page(alloc))) - max = alloc; - - if (align > 0) { - if (PAGE_SIZE % align || align % sizeof(z->free_elements)) - panic("zinit"); - ALIGN_SIZE_UP(size, align); - } - - z->free_elements = 0; - z->cur_size = 0; - z->max_size = max; - z->elem_size = ((size-1) + sizeof(z->free_elements)) - - ((size-1) % sizeof(z->free_elements)); - z->align = align; - - z->alloc_size = alloc; - z->type = memtype; - z->zone_name = name; -#ifdef ZONE_COUNT - z->count = 0; -#endif - z->doing_alloc = FALSE; - zone_lock_init(z); - - /* - * Add the zone to the all-zones list. - */ - - z->next_zone = ZONE_NULL; - simple_lock(&all_zones_lock); - *last_zone = z; - last_zone = &z->next_zone; - num_zones++; - simple_unlock(&all_zones_lock); - - return(z); -} - -/* - * Cram the given memory into the specified zone. - */ -void zcram(zone_t zone, vm_offset_t newmem, vm_size_t size) -{ - register vm_size_t elem_size; - - if (newmem == (vm_offset_t) 0) { - panic("zcram - memory at zero"); - } - elem_size = zone->elem_size; - - zone_lock(zone); - while (size >= elem_size) { - ADD_TO_ZONE(zone, newmem); - zone_page_alloc(newmem, elem_size); - zone_count_up(zone); /* compensate for ADD_TO_ZONE */ - size -= elem_size; - newmem += elem_size; - zone->cur_size += elem_size; - } - zone_unlock(zone); -} - -/* - * Contiguous space allocator for non-paged zones. Allocates "size" amount - * of memory from zone_map. - */ - -static vm_offset_t zget_space(vm_offset_t size, vm_size_t align) -{ - vm_offset_t new_space = 0; - vm_offset_t result; - vm_size_t space_to_add = 0; /*'=0' to quiet gcc warnings */ - - simple_lock(&zget_space_lock); - if (align > 0) { - assert(align < PAGE_SIZE); - ALIGN_SIZE_UP(zalloc_next_space, align); - } - - while ((zalloc_next_space + size) > zalloc_end_of_space) { - /* - * Add at least one page to allocation area. - */ - - space_to_add = round_page(size); - - if (new_space == 0) { - /* - * Memory cannot be wired down while holding - * any locks that the pageout daemon might - * need to free up pages. [Making the zget_space - * lock a complex lock does not help in this - * regard.] - * - * Unlock and allocate memory. Because several - * threads might try to do this at once, don't - * use the memory before checking for available - * space again. - */ - - simple_unlock(&zget_space_lock); - - if (kmem_alloc_wired(zone_map, - &new_space, space_to_add) - != KERN_SUCCESS) - return(0); - zone_page_init(new_space, space_to_add, - ZONE_PAGE_USED); - simple_lock(&zget_space_lock); - if (align > 0) - ALIGN_SIZE_UP(zalloc_next_space, align); - continue; - } - - - /* - * Memory was allocated in a previous iteration. - * - * Check whether the new region is contiguous - * with the old one. - */ - - if (new_space != zalloc_end_of_space) { - /* - * Throw away the remainder of the - * old space, and start a new one. - */ - zalloc_wasted_space += - zalloc_end_of_space - zalloc_next_space; - zalloc_next_space = new_space; - } - - zalloc_end_of_space = new_space + space_to_add; - - new_space = 0; - } - result = zalloc_next_space; - zalloc_next_space += size; - simple_unlock(&zget_space_lock); - - if (new_space != 0) - kmem_free(zone_map, new_space, space_to_add); - - return(result); -} - - -/* - * Initialize the "zone of zones" which uses fixed memory allocated - * earlier in memory initialization. zone_bootstrap is called - * before zone_init. - */ -void zone_bootstrap(void) -{ - simple_lock_init(&all_zones_lock); - first_zone = ZONE_NULL; - last_zone = &first_zone; - num_zones = 0; - - simple_lock_init(&zget_space_lock); - zalloc_next_space = zdata; - zalloc_end_of_space = zdata + zdata_size; - zalloc_wasted_space = 0; - - zone_zone = ZONE_NULL; - zone_zone = zinit(sizeof(struct zone), 0, 128 * sizeof(struct zone), - sizeof(struct zone), 0, "zones"); -} - -void zone_init(void) -{ - vm_offset_t zone_min; - vm_offset_t zone_max; - - vm_size_t zone_table_size; - - zone_map = kmem_suballoc(kernel_map, &zone_min, &zone_max, - zone_map_size, FALSE); - - /* - * Setup garbage collection information: - */ - - zone_table_size = atop(zone_max - zone_min) * - sizeof(struct zone_page_table_entry); - if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table, - zone_table_size) != KERN_SUCCESS) - panic("zone_init"); - zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); - zone_pages = atop(zone_max - zone_min); - zone_map_min_address = zone_min; - zone_map_max_address = zone_max; - simple_lock_init(&zone_page_table_lock); - zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); -} - - -/* - * zalloc returns an element from the specified zone. - */ -vm_offset_t zalloc(zone_t zone) -{ - vm_offset_t addr; - - if (zone == ZONE_NULL) - panic ("zalloc: null zone"); - - check_simple_locks(); - - zone_lock(zone); - REMOVE_FROM_ZONE(zone, addr, vm_offset_t); - while (addr == 0) { - /* - * If nothing was there, try to get more - */ - if (zone->doing_alloc) { - /* - * Someone is allocating memory for this zone. - * Wait for it to show up, then try again. - */ - assert_wait((event_t)&zone->doing_alloc, TRUE); - /* XXX say wakeup needed */ - zone_unlock(zone); - thread_block((void (*)()) 0); - zone_lock(zone); - } - else { - if ((zone->cur_size + (zone->type & ZONE_PAGEABLE ? - zone->alloc_size : zone->elem_size)) > - zone->max_size) { - if (zone->type & ZONE_EXHAUSTIBLE) - break; - /* - * Printf calls logwakeup, which calls - * select_wakeup which will do a zfree - * (which tries to take the select_zone - * lock... Hang. Release the lock now - * so it can be taken again later. - * NOTE: this used to be specific to - * the select_zone, but for - * cleanliness, we just unlock all - * zones before this. - */ - if (!(zone->type & ZONE_FIXED)) { - /* - * We're willing to overflow certain - * zones, but not without complaining. - * - * This is best used in conjunction - * with the collecatable flag. What we - * want is an assurance we can get the - * memory back, assuming there's no - * leak. - */ - zone->max_size += (zone->max_size >> 1); - } else if (!zone_ignore_overflow) { - zone_unlock(zone); - printf("zone \"%s\" empty.\n", - zone->zone_name); - panic("zalloc: zone %s exhausted", - zone->zone_name); - } - } - - if (zone->type & ZONE_PAGEABLE) - zone->doing_alloc = TRUE; - zone_unlock(zone); - - if (zone->type & ZONE_PAGEABLE) { - if (kmem_alloc_pageable(zone_map, &addr, - zone->alloc_size) - != KERN_SUCCESS) - panic("zalloc: no pageable memory for zone %s", - zone->zone_name); - zcram(zone, addr, zone->alloc_size); - zone_lock(zone); - zone->doing_alloc = FALSE; - /* XXX check before doing this */ - thread_wakeup((event_t)&zone->doing_alloc); - - REMOVE_FROM_ZONE(zone, addr, vm_offset_t); - } else if (zone->type & ZONE_COLLECTABLE) { - if (kmem_alloc_wired(zone_map, - &addr, zone->alloc_size) - != KERN_SUCCESS) - panic("zalloc: no wired memory for zone %s", - zone->zone_name); - zone_page_init(addr, zone->alloc_size, - ZONE_PAGE_USED); - zcram(zone, addr, zone->alloc_size); - zone_lock(zone); - REMOVE_FROM_ZONE(zone, addr, vm_offset_t); - } else { - addr = zget_space(zone->elem_size, zone->align); - if (addr == 0) - panic("zalloc: no memory for zone %s", - zone->zone_name); - - zone_lock(zone); - zone_count_up(zone); - zone->cur_size += zone->elem_size; - zone_unlock(zone); - zone_page_alloc(addr, zone->elem_size); - return(addr); - } - } - } - - zone_unlock(zone); - return(addr); -} - - -/* - * zget returns an element from the specified zone - * and immediately returns nothing if there is nothing there. - * - * This form should be used when you can not block (like when - * processing an interrupt). - */ -vm_offset_t zget(zone_t zone) -{ - register vm_offset_t addr; - - if (zone == ZONE_NULL) - panic ("zalloc: null zone"); - - zone_lock(zone); - REMOVE_FROM_ZONE(zone, addr, vm_offset_t); - zone_unlock(zone); - - return(addr); -} - -boolean_t zone_check = FALSE; - -void zfree(zone_t zone, vm_offset_t elem) -{ - zone_lock(zone); - if (zone_check) { - vm_offset_t this; - - /* check the zone's consistency */ - - for (this = zone->free_elements; - this != 0; - this = * (vm_offset_t *) this) - if (this == elem) - panic("zfree"); - } - ADD_TO_ZONE(zone, elem); - zone_unlock(zone); -} - -/* - * Zone garbage collection subroutines - * - * These routines have in common the modification of entries in the - * zone_page_table. The latter contains one entry for every page - * in the zone_map. - * - * For each page table entry in the given range: - * - * zone_page_in_use - decrements in_free_list - * zone_page_free - increments in_free_list - * zone_page_init - initializes in_free_list and alloc_count - * zone_page_alloc - increments alloc_count - * zone_page_dealloc - decrements alloc_count - * zone_add_free_page_list - adds the page to the free list - * - * Two counts are maintained for each page, the in_free_list count and - * alloc_count. The alloc_count is how many zone elements have been - * allocated from a page. (Note that the page could contain elements - * that span page boundaries. The count includes these elements so - * one element may be counted in two pages.) In_free_list is a count - * of how many zone elements are currently free. If in_free_list is - * equal to alloc_count then the page is eligible for garbage - * collection. - * - * Alloc_count and in_free_list are initialized to the correct values - * for a particular zone when a page is zcram'ed into a zone. Subsequent - * gets and frees of zone elements will call zone_page_in_use and - * zone_page_free which modify the in_free_list count. When the zones - * garbage collector runs it will walk through a zones free element list, - * remove the elements that reside on collectable pages, and use - * zone_add_free_page_list to create a list of pages to be collected. - */ - -void zone_page_in_use(addr, size) -vm_offset_t addr; -vm_size_t size; -{ - int i, j; - if ((addr < zone_map_min_address) || - (addr+size > zone_map_max_address)) return; - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - zone_page_table[i].in_free_list--; - } - unlock_zone_page_table(); -} - -void zone_page_free(addr, size) -vm_offset_t addr; -vm_size_t size; -{ - int i, j; - if ((addr < zone_map_min_address) || - (addr+size > zone_map_max_address)) return; - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - /* Set in_free_list to (ZONE_PAGE_USED + 1) if - * it was previously set to ZONE_PAGE_UNUSED. - */ - if (zone_page_table[i].in_free_list == ZONE_PAGE_UNUSED) { - zone_page_table[i].in_free_list = 1; - } else { - zone_page_table[i].in_free_list++; - } - } - unlock_zone_page_table(); -} - -void zone_page_init(addr, size, value) - -vm_offset_t addr; -vm_size_t size; -int value; -{ - int i, j; - if ((addr < zone_map_min_address) || - (addr+size > zone_map_max_address)) return; - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - zone_page_table[i].alloc_count = value; - zone_page_table[i].in_free_list = 0; - } - unlock_zone_page_table(); -} - -void zone_page_alloc(addr, size) -vm_offset_t addr; -vm_size_t size; -{ - int i, j; - if ((addr < zone_map_min_address) || - (addr+size > zone_map_max_address)) return; - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - /* Set alloc_count to (ZONE_PAGE_USED + 1) if - * it was previously set to ZONE_PAGE_UNUSED. - */ - if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) { - zone_page_table[i].alloc_count = 1; - } else { - zone_page_table[i].alloc_count++; - } - } - unlock_zone_page_table(); -} - -void zone_page_dealloc(addr, size) -vm_offset_t addr; -vm_size_t size; -{ - int i, j; - if ((addr < zone_map_min_address) || - (addr+size > zone_map_max_address)) return; - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - zone_page_table[i].alloc_count--; - } - unlock_zone_page_table(); -} - -void -zone_add_free_page_list(free_list, addr, size) - struct zone_page_table_entry **free_list; - vm_offset_t addr; - vm_size_t size; -{ - int i, j; - if ((addr < zone_map_min_address) || - (addr+size > zone_map_max_address)) return; - i = atop(addr-zone_map_min_address); - j = atop((addr+size-1) - zone_map_min_address); - lock_zone_page_table(); - for (; i <= j; i++) { - if (zone_page_table[i].alloc_count == 0) { - zone_page_table[i].next = *free_list; - *free_list = &zone_page_table[i]; - zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED; - zone_page_table[i].in_free_list = 0; - } - } - unlock_zone_page_table(); -} - - -/* This is used for walking through a zone's free element list. - */ -struct zone_free_entry { - struct zone_free_entry * next; -}; - - -/* Zone garbage collection - * - * zone_gc will walk through all the free elements in all the - * zones that are marked collectable looking for reclaimable - * pages. zone_gc is called by consider_zone_gc when the system - * begins to run out of memory. - */ -static void zone_gc(void) -{ - int max_zones; - zone_t z; - int i; - register spl_t s; - struct zone_page_table_entry *freep; - struct zone_page_table_entry *zone_free_page_list; - - simple_lock(&all_zones_lock); - max_zones = num_zones; - z = first_zone; - simple_unlock(&all_zones_lock); - - zone_free_page_list = (struct zone_page_table_entry *) 0; - - for (i = 0; i < max_zones; i++) { - struct zone_free_entry * last; - struct zone_free_entry * elt; - assert(z != ZONE_NULL); - /* run this at splhigh so that interupt routines that use zones - can not interupt while their zone is locked */ - s=splhigh(); - zone_lock(z); - - if ((z->type & (ZONE_PAGEABLE|ZONE_COLLECTABLE)) == ZONE_COLLECTABLE) { - - /* Count the free elements in each page. This loop - * requires that all in_free_list entries are zero. - */ - elt = (struct zone_free_entry *)(z->free_elements); - while ((elt != (struct zone_free_entry *)0)) { - zone_page_free((vm_offset_t)elt, z->elem_size); - elt = elt->next; - } - - /* Now determine which elements should be removed - * from the free list and, after all the elements - * on a page have been removed, add the element's - * page to a list of pages to be freed. - */ - elt = (struct zone_free_entry *)(z->free_elements); - last = elt; - while ((elt != (struct zone_free_entry *)0)) { - if (((vm_offset_t)elt>=zone_map_min_address)&& - ((vm_offset_t)elt<=zone_map_max_address)&& - (zone_page(elt)->in_free_list == - zone_page(elt)->alloc_count)) { - - z->cur_size -= z->elem_size; - zone_page_in_use((vm_offset_t)elt, z->elem_size); - zone_page_dealloc((vm_offset_t)elt, z->elem_size); - if (zone_page(elt)->alloc_count == 0 || - zone_page(elt+(z->elem_size-1))->alloc_count==0) { - zone_add_free_page_list( - &zone_free_page_list, - (vm_offset_t)elt, z->elem_size); - } - - - if (elt == last) { - elt = elt->next; - z->free_elements =(vm_offset_t)elt; - last = elt; - } else { - last->next = elt->next; - elt = elt->next; - } - } else { - /* This element is not eligible for collection - * so clear in_free_list in preparation for a - * subsequent garbage collection pass. - */ - if (((vm_offset_t)elt>=zone_map_min_address)&& - ((vm_offset_t)elt<=zone_map_max_address)) { - zone_page(elt)->in_free_list = 0; - } - last = elt; - elt = elt->next; - } - } - } - zone_unlock(z); - splx(s); - simple_lock(&all_zones_lock); - z = z->next_zone; - simple_unlock(&all_zones_lock); - } - - for (freep = zone_free_page_list; freep != 0; freep = freep->next) { - vm_offset_t free_addr; - - free_addr = zone_map_min_address + - PAGE_SIZE * (freep - zone_page_table); - - /* Hack Hack */ - /* Needed to make vm_map_delete's vm_map_clip_end always be - * able to get an element without having to call zget_space and - * hang because zone_map is already locked by vm_map_delete */ - - extern zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ - vm_offset_t entry1 = zalloc(vm_map_kentry_zone), - entry2 = zalloc(vm_map_kentry_zone); - zfree(vm_map_kentry_zone, entry1); - zfree(vm_map_kentry_zone, entry2); - - kmem_free(zone_map, free_addr, PAGE_SIZE); - } -} - -boolean_t zone_gc_allowed = TRUE; -unsigned zone_gc_last_tick = 0; -unsigned zone_gc_max_rate = 0; /* in ticks */ - -/* - * consider_zone_gc: - * - * Called by the pageout daemon when the system needs more free pages. - */ - -void -consider_zone_gc(void) -{ - /* - * By default, don't attempt zone GC more frequently - * than once a second. - */ - - if (zone_gc_max_rate == 0) - zone_gc_max_rate = hz; - - if (zone_gc_allowed && - (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) { - zone_gc_last_tick = sched_tick; - zone_gc(); - } -} - -#if MACH_DEBUG -kern_return_t host_zone_info(host, namesp, namesCntp, infop, infoCntp) - host_t host; - zone_name_array_t *namesp; - unsigned int *namesCntp; - zone_info_array_t *infop; - unsigned int *infoCntp; -{ - zone_name_t *names; - vm_offset_t names_addr; - vm_size_t names_size = 0; /*'=0' to quiet gcc warnings */ - zone_info_t *info; - vm_offset_t info_addr; - vm_size_t info_size = 0; /*'=0' to quiet gcc warnings */ - unsigned int max_zones, i; - zone_t z; - kern_return_t kr; - - if (host == HOST_NULL) - return KERN_INVALID_HOST; - - /* - * We assume that zones aren't freed once allocated. - * We won't pick up any zones that are allocated later. - */ - - simple_lock(&all_zones_lock); - max_zones = num_zones; - z = first_zone; - simple_unlock(&all_zones_lock); - - if (max_zones <= *namesCntp) { - /* use in-line memory */ - - names = *namesp; - } else { - names_size = round_page(max_zones * sizeof *names); - kr = kmem_alloc_pageable(ipc_kernel_map, - &names_addr, names_size); - if (kr != KERN_SUCCESS) - return kr; - - names = (zone_name_t *) names_addr; - } - - if (max_zones <= *infoCntp) { - /* use in-line memory */ - - info = *infop; - } else { - info_size = round_page(max_zones * sizeof *info); - kr = kmem_alloc_pageable(ipc_kernel_map, - &info_addr, info_size); - if (kr != KERN_SUCCESS) { - if (names != *namesp) - kmem_free(ipc_kernel_map, - names_addr, names_size); - return kr; - } - - info = (zone_info_t *) info_addr; - } - - for (i = 0; i < max_zones; i++) { - zone_name_t *zn = &names[i]; - zone_info_t *zi = &info[i]; - struct zone zcopy; - - assert(z != ZONE_NULL); - - zone_lock(z); - zcopy = *z; - zone_unlock(z); - - simple_lock(&all_zones_lock); - z = z->next_zone; - simple_unlock(&all_zones_lock); - - /* assuming here the name data is static */ - (void) strncpy(zn->zn_name, zcopy.zone_name, - sizeof zn->zn_name); - -#ifdef ZONE_COUNT - zi->zi_count = zcopy.count; -#else - zi->zi_count = 0; -#endif - zi->zi_cur_size = zcopy.cur_size; - zi->zi_max_size = zcopy.max_size; - zi->zi_elem_size = zcopy.elem_size; - zi->zi_alloc_size = zcopy.alloc_size; - zi->zi_pageable = (zcopy.type & ZONE_PAGEABLE) != 0; - zi->zi_exhaustible = (zcopy.type & ZONE_EXHAUSTIBLE) != 0; - zi->zi_collectable = (zcopy.type & ZONE_COLLECTABLE) != 0; - } - - if (names != *namesp) { - vm_size_t used; - vm_map_copy_t copy; - - used = max_zones * sizeof *names; - - if (used != names_size) - memset((char *) (names_addr + used), 0, names_size - used); - - kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size, - TRUE, ©); - assert(kr == KERN_SUCCESS); - - *namesp = (zone_name_t *) copy; - } - *namesCntp = max_zones; - - if (info != *infop) { - vm_size_t used; - vm_map_copy_t copy; - - used = max_zones * sizeof *info; - - if (used != info_size) - memset((char *) (info_addr + used), 0, info_size - used); - - kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, - TRUE, ©); - assert(kr == KERN_SUCCESS); - - *infop = (zone_info_t *) copy; - } - *infoCntp = max_zones; - - return KERN_SUCCESS; -} -#endif /* MACH_DEBUG */ diff --git a/kern/zalloc.h b/kern/zalloc.h deleted file mode 100644 index f1a1850..0000000 --- a/kern/zalloc.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Mach Operating System - * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. - * Copyright (c) 1993,1994 The University of Utah and - * the Computer Systems Laboratory (CSL). - * All rights reserved. - * - * Permission to use, copy, modify and distribute this software and its - * documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF - * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY - * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF - * THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie Mellon - * the rights to redistribute these changes. - */ -/* - * File: zalloc.h - * Author: Avadis Tevanian, Jr. - * Date: 1985 - * - */ - -#ifndef _KERN_ZALLOC_H_ -#define _KERN_ZALLOC_H_ - -#include <mach/machine/vm_types.h> -#include <kern/macro_help.h> -#include <kern/lock.h> -#include <kern/queue.h> -#include <machine/zalloc.h> - -/* - * A zone is a collection of fixed size blocks for which there - * is fast allocation/deallocation access. Kernel routines can - * use zones to manage data structures dynamically, creating a zone - * for each type of data structure to be managed. - * - */ - -struct zone { - decl_simple_lock_data(,lock) /* generic lock */ -#ifdef ZONE_COUNT - int count; /* Number of elements used now */ -#endif - vm_offset_t free_elements; - vm_size_t cur_size; /* current memory utilization */ - vm_size_t max_size; /* how large can this zone grow */ - vm_size_t elem_size; /* size of an element */ - vm_size_t align; /* alignment of elements */ - vm_size_t alloc_size; /* size used for more memory */ - boolean_t doing_alloc; /* is zone expanding now? */ - char *zone_name; /* a name for the zone */ - unsigned int type; /* type of memory */ - lock_data_t complex_lock; /* Lock for pageable zones */ - struct zone *next_zone; /* Link for all-zones list */ -}; -typedef struct zone *zone_t; - -#define ZONE_NULL ((zone_t) 0) - -/* Exported to everyone */ -zone_t zinit(vm_size_t size, vm_size_t align, vm_size_t max, - vm_size_t alloc, unsigned int memtype, char *name); -vm_offset_t zalloc(zone_t zone); -vm_offset_t zget(zone_t zone); -void zfree(zone_t zone, vm_offset_t elem); -void zcram(zone_t zone, vm_offset_t newmem, vm_size_t size); - -/* Exported only to vm/vm_init.c */ -void zone_bootstrap(void); -void zone_init(void); - -/* Exported only to vm/vm_pageout.c */ -void consider_zone_gc(void); - - -/* Memory type bits for zones */ -#define ZONE_PAGEABLE 0x00000001 -#define ZONE_COLLECTABLE 0x00000002 /* Garbage-collect this zone when memory runs low */ -#define ZONE_EXHAUSTIBLE 0x00000004 /* zalloc() on this zone is allowed to fail */ -#define ZONE_FIXED 0x00000008 /* Panic if zone is exhausted (XXX) */ - -/* Machine-dependent code can provide additional memory types. */ -#define ZONE_MACHINE_TYPES 0xffff0000 - - -#ifdef ZONE_COUNT -#define zone_count_up(zone) ((zone)->count++) -#define zone_count_down(zone) ((zone)->count--) -#else -#define zone_count_up(zone) -#define zone_count_down(zone) -#endif - - - -/* These quick inline versions only work for small, nonpageable zones (currently). */ - -static __inline vm_offset_t ZALLOC(zone_t zone) -{ - simple_lock(&zone->lock); - if (zone->free_elements == 0) { - simple_unlock(&zone->lock); - return zalloc(zone); - } else { - vm_offset_t element = zone->free_elements; - zone->free_elements = *((vm_offset_t *)(element)); - zone_count_up(zone); - simple_unlock(&zone->lock); - return element; - } -} - -static __inline void ZFREE(zone_t zone, vm_offset_t element) -{ - *((vm_offset_t *)(element)) = zone->free_elements; - zone->free_elements = (vm_offset_t) (element); - zone_count_down(zone); -} - - - -#endif /* _KERN_ZALLOC_H_ */ diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c index 0c76d3d..c7b3873 100644 --- a/linux/dev/glue/block.c +++ b/linux/dev/glue/block.c @@ -49,6 +49,8 @@ #include <mach/vm_param.h> #include <mach/notify.h> +#include <kern/kalloc.h> + #include <ipc/ipc_port.h> #include <ipc/ipc_space.h> diff --git a/linux/dev/glue/net.c b/linux/dev/glue/net.c index 91ebf96..a60275f 100644 --- a/linux/dev/glue/net.c +++ b/linux/dev/glue/net.c @@ -69,6 +69,7 @@ #include <mach/vm_param.h> #include <mach/notify.h> +#include <kern/kalloc.h> #include <kern/printf.h> #include <ipc/ipc_port.h> diff --git a/vm/memory_object_proxy.c b/vm/memory_object_proxy.c index fdab6e0..4fed312 100644 --- a/vm/memory_object_proxy.c +++ b/vm/memory_object_proxy.c @@ -41,15 +41,14 @@ #include <mach/notify.h> #include <mach/vm_prot.h> #include <kern/printf.h> -#include <kern/zalloc.h> -#include <kern/mach_param.h> +#include <kern/slab.h> #include <ipc/ipc_port.h> #include <ipc/ipc_space.h> #include <vm/memory_object_proxy.h> -/* The zone which holds our proxy memory objects. */ -static zone_t memory_object_proxy_zone; +/* The cache which holds our proxy memory objects. */ +static struct kmem_cache memory_object_proxy_cache; struct memory_object_proxy { @@ -64,13 +63,8 @@ typedef struct memory_object_proxy *memory_object_proxy_t; void memory_object_proxy_init (void) { - /* For limit, see PORT_MAX. */ - memory_object_proxy_zone = zinit (sizeof (struct memory_object_proxy), 0, - (TASK_MAX * 3 + THREAD_MAX) - * sizeof (struct memory_object_proxy), - 256 * sizeof (struct memory_object_proxy), - ZONE_EXHAUSTIBLE, - "proxy memory object zone"); + kmem_cache_init (&memory_object_proxy_cache, "memory_object_proxy", + sizeof (struct memory_object_proxy), 0, NULL, NULL, NULL, 0); } /* Lookup a proxy memory object by its port. */ @@ -153,13 +147,13 @@ memory_object_create_proxy (ipc_space_t space, vm_prot_t max_protection, if (start[0] != 0 || len[0] != (vm_offset_t) ~0) return KERN_INVALID_ARGUMENT; - proxy = (memory_object_proxy_t) zalloc (memory_object_proxy_zone); + proxy = (memory_object_proxy_t) kmem_cache_alloc (&memory_object_proxy_cache); /* Allocate port, keeping a reference for it. */ proxy->port = ipc_port_alloc_kernel (); if (proxy->port == IP_NULL) { - zfree (memory_object_proxy_zone, (vm_offset_t) proxy); + kmem_cache_free (&memory_object_proxy_cache, (vm_offset_t) proxy); return KERN_RESOURCE_SHORTAGE; } /* Associate the port with the proxy memory object. */ diff --git a/vm/vm_external.c b/vm/vm_external.c index ac47faa..e9643ff 100644 --- a/vm/vm_external.c +++ b/vm/vm_external.c @@ -31,7 +31,7 @@ */ #include <mach/boolean.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <vm/vm_external.h> #include <mach/vm_param.h> #include <kern/assert.h> @@ -40,7 +40,7 @@ boolean_t vm_external_unsafe = FALSE; -zone_t vm_external_zone = ZONE_NULL; +struct kmem_cache vm_external_cache; /* * The implementation uses bit arrays to record whether @@ -52,8 +52,8 @@ zone_t vm_external_zone = ZONE_NULL; #define SMALL_SIZE (VM_EXTERNAL_SMALL_SIZE/8) #define LARGE_SIZE (VM_EXTERNAL_LARGE_SIZE/8) -zone_t vm_object_small_existence_map_zone; -zone_t vm_object_large_existence_map_zone; +struct kmem_cache vm_object_small_existence_map_cache; +struct kmem_cache vm_object_large_existence_map_cache; vm_external_t vm_external_create(size) @@ -62,20 +62,17 @@ vm_external_t vm_external_create(size) vm_external_t result; vm_size_t bytes; - if (vm_external_zone == ZONE_NULL) - return(VM_EXTERNAL_NULL); - - result = (vm_external_t) zalloc(vm_external_zone); + result = (vm_external_t) kmem_cache_alloc(&vm_external_cache); result->existence_map = (char *) 0; bytes = (atop(size) + 07) >> 3; if (bytes <= SMALL_SIZE) { result->existence_map = - (char *) zalloc(vm_object_small_existence_map_zone); + (char *) kmem_cache_alloc(&vm_object_small_existence_map_cache); result->existence_size = SMALL_SIZE; } else if (bytes <= LARGE_SIZE) { result->existence_map = - (char *) zalloc(vm_object_large_existence_map_zone); + (char *) kmem_cache_alloc(&vm_object_large_existence_map_cache); result->existence_size = LARGE_SIZE; } return(result); @@ -89,14 +86,14 @@ void vm_external_destroy(e) if (e->existence_map != (char *) 0) { if (e->existence_size <= SMALL_SIZE) { - zfree(vm_object_small_existence_map_zone, + kmem_cache_free(&vm_object_small_existence_map_cache, (vm_offset_t) e->existence_map); } else { - zfree(vm_object_large_existence_map_zone, + kmem_cache_free(&vm_object_large_existence_map_cache, (vm_offset_t) e->existence_map); } } - zfree(vm_external_zone, (vm_offset_t) e); + kmem_cache_free(&vm_external_cache, (vm_offset_t) e); } vm_external_state_t _vm_external_state_get(e, offset) @@ -142,18 +139,14 @@ void vm_external_module_initialize(void) { vm_size_t size = (vm_size_t) sizeof(struct vm_external); - vm_external_zone = zinit(size, 0, 16*1024*size, size, - 0, "external page bitmaps"); + kmem_cache_init(&vm_external_cache, "vm_external", size, 0, + NULL, NULL, NULL, 0); - vm_object_small_existence_map_zone = zinit(SMALL_SIZE, 0, - round_page(LARGE_SIZE * SMALL_SIZE), - round_page(SMALL_SIZE), - ZONE_EXHAUSTIBLE, - "object small existence maps"); + kmem_cache_init(&vm_object_small_existence_map_cache, + "small_existence_map", SMALL_SIZE, 0, + NULL, NULL, NULL, 0); - vm_object_large_existence_map_zone = zinit(LARGE_SIZE, 0, - round_page(8 * LARGE_SIZE), - round_page(LARGE_SIZE), - ZONE_EXHAUSTIBLE, - "object large existence maps"); + kmem_cache_init(&vm_object_large_existence_map_cache, + "large_existence_map", LARGE_SIZE, 0, + NULL, NULL, NULL, 0); } diff --git a/vm/vm_fault.c b/vm/vm_fault.c index cce043a..10955ed 100644 --- a/vm/vm_fault.c +++ b/vm/vm_fault.c @@ -51,9 +51,8 @@ #include <mach/memory_object.h> #include <vm/memory_object_user.user.h> /* For memory_object_data_{request,unlock} */ -#include <kern/mach_param.h> #include <kern/macro_help.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #if MACH_PCSAMPLE #include <kern/pc_sample.h> @@ -85,7 +84,7 @@ typedef struct vm_fault_state { vm_prot_t vmfp_access; } vm_fault_state_t; -zone_t vm_fault_state_zone = 0; +struct kmem_cache vm_fault_state_cache; int vm_object_absent_max = 50; @@ -107,10 +106,8 @@ extern struct db_watchpoint *db_watchpoint_list; */ void vm_fault_init(void) { - vm_fault_state_zone = zinit(sizeof(vm_fault_state_t), 0, - THREAD_MAX * sizeof(vm_fault_state_t), - sizeof(vm_fault_state_t), - 0, "vm fault state"); + kmem_cache_init(&vm_fault_state_cache, "vm_fault_state", + sizeof(vm_fault_state_t), 0, NULL, NULL, NULL, 0); } /* @@ -1206,12 +1203,12 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, /* * if this assignment stmt is written as - * 'active_threads[cpu_number()] = zalloc()', - * cpu_number may be evaluated before zalloc; - * if zalloc blocks, cpu_number will be wrong + * 'active_threads[cpu_number()] = kmem_cache_alloc()', + * cpu_number may be evaluated before kmem_cache_alloc; + * if kmem_cache_alloc blocks, cpu_number will be wrong */ - state = (char *) zalloc(vm_fault_state_zone); + state = (char *) kmem_cache_alloc(&vm_fault_state_cache); current_thread()->ith_other = state; } @@ -1490,7 +1487,7 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring, register vm_fault_state_t *state = (vm_fault_state_t *) current_thread()->ith_other; - zfree(vm_fault_state_zone, (vm_offset_t) state); + kmem_cache_free(&vm_fault_state_cache, (vm_offset_t) state); (*continuation)(kr); /*NOTREACHED*/ } diff --git a/vm/vm_init.c b/vm/vm_init.c index 33fca65..89eb098 100644 --- a/vm/vm_init.c +++ b/vm/vm_init.c @@ -35,7 +35,7 @@ */ #include <mach/machine/vm_types.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <kern/kalloc.h> #include <vm/vm_fault.h> #include <vm/vm_object.h> @@ -67,12 +67,12 @@ void vm_mem_bootstrap() * Initialize other VM packages */ - zone_bootstrap(); + slab_bootstrap(); vm_object_bootstrap(); vm_map_init(); kmem_init(start, end); pmap_init(); - zone_init(); + slab_init(); kalloc_init(); vm_fault_init(); vm_page_module_init(); diff --git a/vm/vm_kern.c b/vm/vm_kern.c index cfa33ff..fd46e98 100644 --- a/vm/vm_kern.c +++ b/vm/vm_kern.c @@ -58,7 +58,8 @@ * Variables exported by this module. */ -vm_map_t kernel_map; +static struct vm_map kernel_map_store; +vm_map_t kernel_map = &kernel_map_store; vm_map_t kernel_pageable_map; extern void kmem_alloc_pages(); @@ -811,27 +812,27 @@ kmem_remap_pages(object, offset, start, end, protection) } /* - * kmem_suballoc: + * kmem_submap: * - * Allocates a map to manage a subrange + * Initializes a map to manage a subrange * of the kernel virtual address space. * * Arguments are as follows: * + * map Map to initialize * parent Map to take range from * size Size of range to find * min, max Returned endpoints of map * pageable Can the region be paged */ -vm_map_t -kmem_suballoc(parent, min, max, size, pageable) - vm_map_t parent; +void +kmem_submap(map, parent, min, max, size, pageable) + vm_map_t map, parent; vm_offset_t *min, *max; vm_size_t size; boolean_t pageable; { - vm_map_t map; vm_offset_t addr; kern_return_t kr; @@ -850,20 +851,16 @@ kmem_suballoc(parent, min, max, size, pageable) vm_submap_object, (vm_offset_t) 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) - panic("kmem_suballoc"); + panic("kmem_submap"); pmap_reference(vm_map_pmap(parent)); - map = vm_map_create(vm_map_pmap(parent), addr, addr + size, pageable); - if (map == VM_MAP_NULL) - panic("kmem_suballoc"); - + vm_map_setup(map, vm_map_pmap(parent), addr, addr + size, pageable); kr = vm_map_submap(parent, addr, addr + size, map); if (kr != KERN_SUCCESS) - panic("kmem_suballoc"); + panic("kmem_submap"); *min = addr; *max = addr + size; - return map; } /* @@ -876,9 +873,8 @@ void kmem_init(start, end) vm_offset_t start; vm_offset_t end; { - kernel_map = vm_map_create(pmap_kernel(), - VM_MIN_KERNEL_ADDRESS, end, - FALSE); + vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end, + FALSE); /* * Reserve virtual memory allocated up to this time. diff --git a/vm/vm_kern.h b/vm/vm_kern.h index ca93d7a..22b7c12 100644 --- a/vm/vm_kern.h +++ b/vm/vm_kern.h @@ -58,8 +58,8 @@ extern kern_return_t kmem_realloc(vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *, vm_size_t); extern void kmem_free(vm_map_t, vm_offset_t, vm_size_t); -extern vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, - vm_size_t, boolean_t); +extern void kmem_submap(vm_map_t, vm_map_t, vm_offset_t *, + vm_offset_t *, vm_size_t, boolean_t); extern kern_return_t kmem_io_map_copyout(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t *, diff --git a/vm/vm_map.c b/vm/vm_map.c index ce83403..de10eec 100644 --- a/vm/vm_map.c +++ b/vm/vm_map.c @@ -41,7 +41,8 @@ #include <mach/vm_param.h> #include <kern/assert.h> #include <kern/debug.h> -#include <kern/zalloc.h> +#include <kern/kalloc.h> +#include <kern/slab.h> #include <vm/pmap.h> #include <vm/vm_fault.h> #include <vm/vm_map.h> @@ -70,7 +71,7 @@ void vm_map_copy_page_discard (vm_map_copy_t copy); * map entry to the same memory - the wired count in the new entry * must be set to zero. vm_map_entry_copy_full() creates a new * entry that is identical to the old entry. This preserves the - * wire count; it's used for map splitting and zone changing in + * wire count; it's used for map splitting and cache changing in * vm_map_copyout. */ #define vm_map_entry_copy(NEW,OLD) \ @@ -130,10 +131,10 @@ MACRO_END * vm_object_copy_strategically() in vm_object.c. */ -zone_t vm_map_zone; /* zone for vm_map structures */ -zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ -zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ -zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ +struct kmem_cache vm_map_cache; /* cache for vm_map structures */ +struct kmem_cache vm_map_entry_cache; /* cache for vm_map_entry structures */ +struct kmem_cache vm_map_kentry_cache; /* cache for kernel entry structures */ +struct kmem_cache vm_map_copy_cache; /* cache for vm_map_copy structures */ boolean_t vm_map_lookup_entry(); /* forward declaration */ @@ -143,7 +144,8 @@ boolean_t vm_map_lookup_entry(); /* forward declaration */ * vm_map_submap creates the submap. */ -vm_object_t vm_submap_object; +static struct vm_object vm_submap_object_store; +vm_object_t vm_submap_object = &vm_submap_object_store; /* * vm_map_init: @@ -151,51 +153,81 @@ vm_object_t vm_submap_object; * Initialize the vm_map module. Must be called before * any other vm_map routines. * - * Map and entry structures are allocated from zones -- we must - * initialize those zones. + * Map and entry structures are allocated from caches -- we must + * initialize those caches. * - * There are three zones of interest: + * There are three caches of interest: * - * vm_map_zone: used to allocate maps. - * vm_map_entry_zone: used to allocate map entries. - * vm_map_kentry_zone: used to allocate map entries for the kernel. + * vm_map_cache: used to allocate maps. + * vm_map_entry_cache: used to allocate map entries. + * vm_map_kentry_cache: used to allocate map entries for the kernel. * - * The kernel allocates map entries from a special zone that is initially - * "crammed" with memory. It would be difficult (perhaps impossible) for - * the kernel to allocate more memory to a entry zone when it became - * empty since the very act of allocating memory implies the creation - * of a new entry. + * Kernel map entries are allocated from a special cache, using a custom + * page allocation function to avoid recursion. It would be difficult + * (perhaps impossible) for the kernel to allocate more memory to an entry + * cache when it became empty since the very act of allocating memory + * implies the creation of a new entry. */ vm_offset_t kentry_data; -vm_size_t kentry_data_size; -int kentry_count = 256; /* to init kentry_data_size */ +vm_size_t kentry_data_size = 32 * PAGE_SIZE; -void vm_map_init(void) +static vm_offset_t kentry_pagealloc(vm_size_t size) { - vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 0, 40*1024, - PAGE_SIZE, 0, "maps"); - vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry), - 0, 1024*1024, PAGE_SIZE*5, - 0, "non-kernel map entries"); - vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry), 0, - kentry_data_size, kentry_data_size, - ZONE_FIXED /* XXX */, "kernel map entries"); - - vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy), - 0, 16*1024, PAGE_SIZE, 0, - "map copies"); + vm_offset_t result; - /* - * Cram the kentry zone with initial data. - */ - zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); + if (size > kentry_data_size) + panic("vm_map: kentry memory exhausted"); + + result = kentry_data; + kentry_data += size; + kentry_data_size -= size; + return result; +} + +void vm_map_init(void) +{ + kmem_cache_init(&vm_map_cache, "vm_map", sizeof(struct vm_map), 0, + NULL, NULL, NULL, 0); + kmem_cache_init(&vm_map_entry_cache, "vm_map_entry", + sizeof(struct vm_map_entry), 0, NULL, NULL, NULL, 0); + kmem_cache_init(&vm_map_kentry_cache, "vm_map_kentry", + sizeof(struct vm_map_entry), 0, NULL, kentry_pagealloc, + NULL, KMEM_CACHE_NOCPUPOOL | KMEM_CACHE_NOOFFSLAB + | KMEM_CACHE_NORECLAIM); + kmem_cache_init(&vm_map_copy_cache, "vm_map_copy", + sizeof(struct vm_map_copy), 0, NULL, NULL, NULL, 0); /* * Submap object is initialized by vm_object_init. */ } +void vm_map_setup(map, pmap, min, max, pageable) + vm_map_t map; + pmap_t pmap; + vm_offset_t min, max; + boolean_t pageable; +{ + vm_map_first_entry(map) = vm_map_to_entry(map); + vm_map_last_entry(map) = vm_map_to_entry(map); + map->hdr.nentries = 0; + map->hdr.entries_pageable = pageable; + + map->size = 0; + map->ref_count = 1; + map->pmap = pmap; + map->min_offset = min; + map->max_offset = max; + map->wiring_required = FALSE; + map->wait_for_space = FALSE; + map->first_free = vm_map_to_entry(map); + map->hint = vm_map_to_entry(map); + vm_map_lock_init(map); + simple_lock_init(&map->ref_lock); + simple_lock_init(&map->hint_lock); +} + /* * vm_map_create: * @@ -210,27 +242,11 @@ vm_map_t vm_map_create(pmap, min, max, pageable) { register vm_map_t result; - result = (vm_map_t) zalloc(vm_map_zone); + result = (vm_map_t) kmem_cache_alloc(&vm_map_cache); if (result == VM_MAP_NULL) panic("vm_map_create"); - vm_map_first_entry(result) = vm_map_to_entry(result); - vm_map_last_entry(result) = vm_map_to_entry(result); - result->hdr.nentries = 0; - result->hdr.entries_pageable = pageable; - - result->size = 0; - result->ref_count = 1; - result->pmap = pmap; - result->min_offset = min; - result->max_offset = max; - result->wiring_required = FALSE; - result->wait_for_space = FALSE; - result->first_free = vm_map_to_entry(result); - result->hint = vm_map_to_entry(result); - vm_map_lock_init(result); - simple_lock_init(&result->ref_lock); - simple_lock_init(&result->hint_lock); + vm_map_setup(result, pmap, min, max, pageable); return(result); } @@ -250,15 +266,15 @@ vm_map_t vm_map_create(pmap, min, max, pageable) vm_map_entry_t _vm_map_entry_create(map_header) register struct vm_map_header *map_header; { - register zone_t zone; + register kmem_cache_t cache; register vm_map_entry_t entry; if (map_header->entries_pageable) - zone = vm_map_entry_zone; + cache = &vm_map_entry_cache; else - zone = vm_map_kentry_zone; + cache = &vm_map_kentry_cache; - entry = (vm_map_entry_t) zalloc(zone); + entry = (vm_map_entry_t) kmem_cache_alloc(cache); if (entry == VM_MAP_ENTRY_NULL) panic("vm_map_entry_create"); @@ -280,14 +296,14 @@ void _vm_map_entry_dispose(map_header, entry) register struct vm_map_header *map_header; register vm_map_entry_t entry; { - register zone_t zone; + register kmem_cache_t cache; if (map_header->entries_pageable) - zone = vm_map_entry_zone; + cache = &vm_map_entry_cache; else - zone = vm_map_kentry_zone; + cache = &vm_map_kentry_cache; - zfree(zone, (vm_offset_t) entry); + kmem_cache_free(cache, (vm_offset_t) entry); } /* @@ -368,7 +384,7 @@ void vm_map_deallocate(map) pmap_destroy(map->pmap); - zfree(vm_map_zone, (vm_offset_t) map); + kmem_cache_free(&vm_map_cache, (vm_offset_t) map); } /* @@ -1907,7 +1923,7 @@ free_next_copy: register vm_map_copy_t new_copy; new_copy = (vm_map_copy_t) copy->cpy_cont_args; - zfree(vm_map_copy_zone, (vm_offset_t) copy); + kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy); copy = new_copy; goto free_next_copy; } @@ -1918,7 +1934,7 @@ free_next_copy: break; } - zfree(vm_map_copy_zone, (vm_offset_t) copy); + kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy); } /* @@ -1952,7 +1968,7 @@ vm_map_copy_copy(copy) * from the old one into it. */ - new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + new_copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache); *new_copy = *copy; if (copy->type == VM_MAP_COPY_ENTRY_LIST) { @@ -2160,7 +2176,7 @@ start_pass_1: /* * XXXO If there are no permanent objects in the destination, - * XXXO and the source and destination map entry zones match, + * XXXO and the source and destination map entry caches match, * XXXO and the destination map entry is not shared, * XXXO then the map entries can be deleted and replaced * XXXO with those from the copy. The following code is the @@ -2403,7 +2419,7 @@ start_pass_1: ((where)->vme_next = vm_map_copy_first_entry(copy)) \ ->vme_prev = (where); \ (map)->hdr.nentries += (copy)->cpy_hdr.nentries; \ - zfree(vm_map_copy_zone, (vm_offset_t) copy); \ + kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy); \ MACRO_END /* @@ -2459,7 +2475,7 @@ kern_return_t vm_map_copyout(dst_map, dst_addr, copy) VM_INHERIT_DEFAULT); if (kr != KERN_SUCCESS) return(kr); - zfree(vm_map_copy_zone, (vm_offset_t) copy); + kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy); return(KERN_SUCCESS); } @@ -2516,15 +2532,15 @@ kern_return_t vm_map_copyout(dst_map, dst_addr, copy) * Mismatches occur when dealing with the default * pager. */ - zone_t old_zone; + kmem_cache_t old_cache; vm_map_entry_t next, new; /* - * Find the zone that the copies were allocated from + * Find the cache that the copies were allocated from */ - old_zone = (copy->cpy_hdr.entries_pageable) - ? vm_map_entry_zone - : vm_map_kentry_zone; + old_cache = (copy->cpy_hdr.entries_pageable) + ? &vm_map_entry_cache + : &vm_map_kentry_cache; entry = vm_map_copy_first_entry(copy); /* @@ -2547,7 +2563,7 @@ kern_return_t vm_map_copyout(dst_map, dst_addr, copy) vm_map_copy_last_entry(copy), new); next = entry->vme_next; - zfree(old_zone, (vm_offset_t) entry); + kmem_cache_free(old_cache, (vm_offset_t) entry); entry = next; } } @@ -3036,10 +3052,10 @@ error: * Consume on success logic. */ if (copy != orig_copy) { - zfree(vm_map_copy_zone, (vm_offset_t) copy); + kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy); } if (result == KERN_SUCCESS) { - zfree(vm_map_copy_zone, (vm_offset_t) orig_copy); + kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) orig_copy); } return(result); @@ -3116,7 +3132,7 @@ kern_return_t vm_map_copyin(src_map, src_addr, len, src_destroy, copy_result) * remember the endpoints prior to rounding. */ - copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache); vm_map_copy_first_entry(copy) = vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); copy->type = VM_MAP_COPY_ENTRY_LIST; @@ -3443,7 +3459,7 @@ kern_return_t vm_map_copyin_object(object, offset, size, copy_result) * and null links. */ - copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache); vm_map_copy_first_entry(copy) = vm_map_copy_last_entry(copy) = VM_MAP_ENTRY_NULL; copy->type = VM_MAP_COPY_OBJECT; @@ -3598,7 +3614,7 @@ kern_return_t vm_map_copyin_page_list(src_map, src_addr, len, src_destroy, * be page-aligned. */ - copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); + copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache); copy->type = VM_MAP_COPY_PAGE_LIST; copy->cpy_npages = 0; copy->offset = src_addr; diff --git a/vm/vm_map.h b/vm/vm_map.h index 567fe93..f4e9395 100644 --- a/vm/vm_map.h +++ b/vm/vm_map.h @@ -357,6 +357,9 @@ extern int kentry_count; /* Initialize the module */ extern void vm_map_init(void); +/* Initialize an empty map */ +extern void vm_map_setup(vm_map_t, pmap_t, vm_offset_t, vm_offset_t, + boolean_t); /* Create an empty map */ extern vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t, boolean_t); diff --git a/vm/vm_object.c b/vm/vm_object.c index 9057973..d80124a 100644 --- a/vm/vm_object.c +++ b/vm/vm_object.c @@ -47,7 +47,7 @@ #include <kern/lock.h> #include <kern/queue.h> #include <kern/xpr.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <vm/memory_object.h> #include <vm/vm_fault.h> #include <vm/vm_map.h> @@ -141,13 +141,14 @@ void vm_object_deactivate_pages(vm_object_t); * ZZZ Continue this comment. */ -zone_t vm_object_zone; /* vm backing store zone */ +struct kmem_cache vm_object_cache; /* vm backing store cache */ /* * All wired-down kernel memory belongs to a single virtual * memory object (kernel_object) to avoid wasting data structures. */ -vm_object_t kernel_object; +static struct vm_object kernel_object_store; +vm_object_t kernel_object = &kernel_object_store; /* * Virtual memory objects that are not referenced by @@ -198,7 +199,7 @@ decl_simple_lock_data(,vm_object_cached_lock_data) * object structure, be sure to add initialization * (see vm_object_init). */ -vm_object_t vm_object_template; +struct vm_object vm_object_template; /* * vm_object_allocate: @@ -206,17 +207,24 @@ vm_object_t vm_object_template; * Returns a new object with the given size. */ +static void _vm_object_setup( + vm_object_t object, + vm_size_t size) +{ + *object = vm_object_template; + queue_init(&object->memq); + vm_object_lock_init(object); + object->size = size; +} + vm_object_t _vm_object_allocate( vm_size_t size) { register vm_object_t object; - object = (vm_object_t) zalloc(vm_object_zone); + object = (vm_object_t) kmem_cache_alloc(&vm_object_cache); - *object = *vm_object_template; - queue_init(&object->memq); - vm_object_lock_init(object); - object->size = size; + _vm_object_setup(object, size); return object; } @@ -244,10 +252,8 @@ vm_object_t vm_object_allocate( */ void vm_object_bootstrap(void) { - vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), 0, - round_page(512*1024), - round_page(12*1024), - 0, "objects"); + kmem_cache_init(&vm_object_cache, "vm_object", + sizeof(struct vm_object), 0, NULL, NULL, NULL, 0); queue_init(&vm_object_cached_list); simple_lock_init(&vm_object_cached_lock_data); @@ -256,53 +262,50 @@ void vm_object_bootstrap(void) * Fill in a template object, for quick initialization */ - vm_object_template = (vm_object_t) zalloc(vm_object_zone); - memset(vm_object_template, 0, sizeof *vm_object_template); - - vm_object_template->ref_count = 1; - vm_object_template->size = 0; - vm_object_template->resident_page_count = 0; - vm_object_template->copy = VM_OBJECT_NULL; - vm_object_template->shadow = VM_OBJECT_NULL; - vm_object_template->shadow_offset = (vm_offset_t) 0; + vm_object_template.ref_count = 1; + vm_object_template.size = 0; + vm_object_template.resident_page_count = 0; + vm_object_template.copy = VM_OBJECT_NULL; + vm_object_template.shadow = VM_OBJECT_NULL; + vm_object_template.shadow_offset = (vm_offset_t) 0; - vm_object_template->pager = IP_NULL; - vm_object_template->paging_offset = 0; - vm_object_template->pager_request = PAGER_REQUEST_NULL; - vm_object_template->pager_name = IP_NULL; + vm_object_template.pager = IP_NULL; + vm_object_template.paging_offset = 0; + vm_object_template.pager_request = PAGER_REQUEST_NULL; + vm_object_template.pager_name = IP_NULL; - vm_object_template->pager_created = FALSE; - vm_object_template->pager_initialized = FALSE; - vm_object_template->pager_ready = FALSE; + vm_object_template.pager_created = FALSE; + vm_object_template.pager_initialized = FALSE; + vm_object_template.pager_ready = FALSE; - vm_object_template->copy_strategy = MEMORY_OBJECT_COPY_NONE; + vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_NONE; /* ignored if temporary, will be reset before * permanent object becomes ready */ - vm_object_template->use_shared_copy = FALSE; - vm_object_template->shadowed = FALSE; - - vm_object_template->absent_count = 0; - vm_object_template->all_wanted = 0; /* all bits FALSE */ - - vm_object_template->paging_in_progress = 0; - vm_object_template->can_persist = FALSE; - vm_object_template->internal = TRUE; - vm_object_template->temporary = TRUE; - vm_object_template->alive = TRUE; - vm_object_template->lock_in_progress = FALSE; - vm_object_template->lock_restart = FALSE; - vm_object_template->use_old_pageout = TRUE; /* XXX change later */ - vm_object_template->last_alloc = (vm_offset_t) 0; + vm_object_template.use_shared_copy = FALSE; + vm_object_template.shadowed = FALSE; + + vm_object_template.absent_count = 0; + vm_object_template.all_wanted = 0; /* all bits FALSE */ + + vm_object_template.paging_in_progress = 0; + vm_object_template.can_persist = FALSE; + vm_object_template.internal = TRUE; + vm_object_template.temporary = TRUE; + vm_object_template.alive = TRUE; + vm_object_template.lock_in_progress = FALSE; + vm_object_template.lock_restart = FALSE; + vm_object_template.use_old_pageout = TRUE; /* XXX change later */ + vm_object_template.last_alloc = (vm_offset_t) 0; #if MACH_PAGEMAP - vm_object_template->existence_info = VM_EXTERNAL_NULL; + vm_object_template.existence_info = VM_EXTERNAL_NULL; #endif /* MACH_PAGEMAP */ /* * Initialize the "kernel object" */ - kernel_object = _vm_object_allocate( + _vm_object_setup(kernel_object, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS); /* @@ -310,7 +313,7 @@ void vm_object_bootstrap(void) * kernel object so that no limit is imposed on submap sizes. */ - vm_submap_object = _vm_object_allocate( + _vm_object_setup(vm_submap_object, VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS); #if MACH_PAGEMAP @@ -660,7 +663,7 @@ void vm_object_terminate( * Free the space for the object. */ - zfree(vm_object_zone, (vm_offset_t) object); + kmem_cache_free(&vm_object_cache, (vm_offset_t) object); } /* @@ -2618,7 +2621,7 @@ void vm_object_collapse( vm_object_unlock(object); if (old_name_port != IP_NULL) ipc_port_dealloc_kernel(old_name_port); - zfree(vm_object_zone, (vm_offset_t) backing_object); + kmem_cache_free(&vm_object_cache, (vm_offset_t) backing_object); vm_object_lock(object); object_collapses++; diff --git a/vm/vm_page.h b/vm/vm_page.h index f13b0af..4536d1c 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -41,7 +41,6 @@ #include <vm/vm_types.h> #include <kern/queue.h> #include <kern/lock.h> -#include <kern/zalloc.h> #include <kern/macro_help.h> #include <kern/sched_prim.h> /* definitions of wait/wakeup */ diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c index 7a755bf..77c1cfe 100644 --- a/vm/vm_pageout.c +++ b/vm/vm_pageout.c @@ -43,6 +43,7 @@ #include <mach/vm_statistics.h> #include <kern/counters.h> #include <kern/debug.h> +#include <kern/slab.h> #include <kern/task.h> #include <kern/thread.h> #include <vm/pmap.h> @@ -544,8 +545,8 @@ void vm_pageout_scan() * into an internal object and then immediately double-page it, * sending it to the default pager. * - * consider_zone_gc should be last, because the other operations - * might return memory to zones. When we pause we use + * slab_collect should be last, because the other operations + * might return memory to caches. When we pause we use * vm_pageout_scan_continue as our continuation, so we will * reenter vm_pageout_scan periodically and attempt to reclaim * internal memory even if we never reach vm_page_free_target. @@ -555,7 +556,7 @@ void vm_pageout_scan() net_kmsg_collect(); consider_task_collect(); consider_thread_collect(); - consider_zone_gc(); + slab_collect(); for (burst_count = 0;;) { register vm_page_t m; diff --git a/vm/vm_resident.c b/vm/vm_resident.c index 96354a4..ae71a74 100644 --- a/vm/vm_resident.c +++ b/vm/vm_resident.c @@ -45,7 +45,7 @@ #include <mach/vm_statistics.h> #include <machine/vm_param.h> #include <kern/xpr.h> -#include <kern/zalloc.h> +#include <kern/slab.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_page.h> @@ -58,10 +58,6 @@ #include <vm/vm_user.h> #endif -/* in zalloc.c XXX */ -extern vm_offset_t zdata; -extern vm_size_t zdata_size; - /* * Associated with eacn page of user-allocatable memory is a * page structure. @@ -126,7 +122,7 @@ unsigned int vm_page_free_count_minimum; /* debugging */ * These page structures are allocated the way * most other kernel structures are. */ -zone_t vm_page_zone; +struct kmem_cache vm_page_cache; /* * Fictitious pages don't have a physical address, @@ -239,14 +235,11 @@ void vm_page_bootstrap( vm_page_free_wanted = 0; /* - * Steal memory for the zone system. + * Steal memory for the kernel map entries. */ - kentry_data_size = kentry_count * sizeof(struct vm_map_entry); kentry_data = pmap_steal_memory(kentry_data_size); - zdata = pmap_steal_memory(zdata_size); - /* * Allocate (and initialize) the virtual-to-physical * table hash buckets. @@ -430,10 +423,8 @@ void pmap_startup( */ void vm_page_module_init(void) { - vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page), 0, - VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, - PAGE_SIZE, - 0, "vm pages"); + kmem_cache_init(&vm_page_cache, "vm_page", sizeof(struct vm_page), 0, + NULL, NULL, NULL, 0); } /* @@ -455,7 +446,7 @@ void vm_page_create( for (paddr = round_page(start); paddr < trunc_page(end); paddr += PAGE_SIZE) { - m = (vm_page_t) zalloc(vm_page_zone); + m = (vm_page_t) kmem_cache_alloc(&vm_page_cache); if (m == VM_PAGE_NULL) panic("vm_page_create"); @@ -810,7 +801,7 @@ void vm_page_more_fictitious(void) int i; for (i = 0; i < vm_page_fictitious_quantum; i++) { - m = (vm_page_t) zalloc(vm_page_zone); + m = (vm_page_t) kmem_cache_alloc(&vm_page_cache); if (m == VM_PAGE_NULL) panic("vm_page_more_fictitious"); diff --git a/xen/block.c b/xen/block.c index 02d410f..fb18b67 100644 --- a/xen/block.c +++ b/xen/block.c @@ -18,6 +18,7 @@ #include <sys/types.h> #include <mach/mig_errors.h> +#include <kern/kalloc.h> #include <ipc/ipc_port.h> #include <ipc/ipc_space.h> #include <vm/vm_kern.h> @@ -18,6 +18,7 @@ #include <sys/types.h> #include <mach/mig_errors.h> +#include <kern/kalloc.h> #include <ipc/ipc_port.h> #include <ipc/ipc_space.h> #include <vm/vm_kern.h> diff --git a/xen/store.c b/xen/store.c index 94d0ae4..8796bb5 100644 --- a/xen/store.c +++ b/xen/store.c @@ -20,6 +20,7 @@ #include <mach/mig_support.h> #include <machine/pmap.h> #include <machine/ipl.h> +#include <kern/kalloc.h> #include <stdarg.h> #include <string.h> #include <alloca.h> |