summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefrag.am12
-rw-r--r--configfrag.ac6
-rw-r--r--device/dev_lookup.c17
-rw-r--r--device/dev_pager.c34
-rw-r--r--device/ds_routines.c49
-rw-r--r--device/io_req.h5
-rw-r--r--device/net_io.c32
-rw-r--r--i386/Makefrag.am1
-rw-r--r--i386/configfrag.ac3
-rw-r--r--i386/i386/fpu.c26
-rw-r--r--i386/i386/io_perm.c7
-rw-r--r--i386/i386/machine_task.c20
-rw-r--r--i386/i386/pcb.c14
-rw-r--r--i386/i386/task.h4
-rw-r--r--i386/i386/zalloc.h29
-rw-r--r--i386/intel/pmap.c30
-rw-r--r--i386/intel/pmap.h1
-rw-r--r--include/mach_debug/mach_debug.defs20
-rw-r--r--include/mach_debug/mach_debug_types.defs7
-rw-r--r--include/mach_debug/mach_debug_types.h2
-rw-r--r--include/mach_debug/slab_info.h (renamed from include/mach_debug/zone_info.h)57
-rw-r--r--ipc/ipc_entry.c4
-rw-r--r--ipc/ipc_entry.h8
-rw-r--r--ipc/ipc_hash.c8
-rw-r--r--ipc/ipc_hash.h2
-rw-r--r--ipc/ipc_init.c47
-rw-r--r--ipc/ipc_init.h8
-rw-r--r--ipc/ipc_marequest.c27
-rw-r--r--ipc/ipc_marequest.h1
-rw-r--r--ipc/ipc_object.c3
-rw-r--r--ipc/ipc_object.h10
-rw-r--r--ipc/ipc_space.c4
-rw-r--r--ipc/ipc_space.h7
-rw-r--r--ipc/ipc_table.c7
-rw-r--r--kern/act.c18
-rw-r--r--kern/bootstrap.c1
-rw-r--r--kern/ipc_tt.c1
-rw-r--r--kern/kalloc.c254
-rw-r--r--kern/kalloc.h4
-rw-r--r--kern/list.h349
-rw-r--r--kern/mach_clock.c3
-rw-r--r--kern/mach_param.h67
-rw-r--r--kern/macro_help.h4
-rw-r--r--kern/pc_sample.c1
-rw-r--r--kern/priority.c1
-rw-r--r--kern/processor.c15
-rw-r--r--kern/rbtree.c478
-rw-r--r--kern/rbtree.h298
-rw-r--r--kern/rbtree_i.h179
-rw-r--r--kern/server_loop.ch1
-rw-r--r--kern/slab.c1576
-rw-r--r--kern/slab.h222
-rw-r--r--kern/startup.c1
-rw-r--r--kern/task.c48
-rw-r--r--kern/task.h2
-rw-r--r--kern/thread.c17
-rw-r--r--kern/zalloc.c1007
-rw-r--r--kern/zalloc.h136
-rw-r--r--linux/dev/glue/block.c2
-rw-r--r--linux/dev/glue/net.c1
-rw-r--r--vm/memory_object_proxy.c20
-rw-r--r--vm/vm_external.c43
-rw-r--r--vm/vm_fault.c21
-rw-r--r--vm/vm_init.c6
-rw-r--r--vm/vm_kern.c30
-rw-r--r--vm/vm_kern.h4
-rw-r--r--vm/vm_map.c178
-rw-r--r--vm/vm_map.h3
-rw-r--r--vm/vm_object.c103
-rw-r--r--vm/vm_page.h1
-rw-r--r--vm/vm_pageout.c7
-rw-r--r--vm/vm_resident.c23
-rw-r--r--xen/block.c1
-rw-r--r--xen/net.c1
-rw-r--r--xen/store.c1
75 files changed, 3571 insertions, 2069 deletions
diff --git a/Makefrag.am b/Makefrag.am
index a80cd24..7180093 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -151,9 +151,9 @@ libkernel_a_SOURCES += \
kern/ipc_sched.h \
kern/ipc_tt.c \
kern/ipc_tt.h \
- kern/kalloc.c \
kern/kalloc.h \
kern/kern_types.h \
+ kern/list.h \
kern/lock.c \
kern/lock.h \
kern/lock_mon.c \
@@ -161,7 +161,6 @@ libkernel_a_SOURCES += \
kern/mach_clock.h \
kern/mach_factor.c \
kern/mach_factor.h \
- kern/mach_param.h \
kern/machine.c \
kern/machine.h \
kern/macro_help.h \
@@ -175,7 +174,12 @@ libkernel_a_SOURCES += \
kern/profile.c \
kern/queue.c \
kern/queue.h \
+ kern/rbtree.c \
+ kern/rbtree.h \
+ kern/rbtree_i.h \
kern/refcount.h \
+ kern/slab.c \
+ kern/slab.h \
kern/sched.h \
kern/sched_prim.c \
kern/sched_prim.h \
@@ -200,8 +204,6 @@ libkernel_a_SOURCES += \
kern/timer.h \
kern/xpr.c \
kern/xpr.h \
- kern/zalloc.c \
- kern/zalloc.h \
kern/elf-load.c \
kern/boot_script.c
EXTRA_DIST += \
@@ -402,7 +404,7 @@ include_mach_eXec_HEADERS = \
# mach-debug-headers:= $(addprefix mach_debug/, hash_info.h ipc_info.h \
# mach_debug.defs mach_debug_types.defs mach_debug_types.h \
-# pc_info.h vm_info.h zone_info.h)
+# pc_info.h vm_info.h slab_info.h)
# Other headers for the distribution. We don't install these, because the
# GNU C library has correct versions for users to use.
diff --git a/configfrag.ac b/configfrag.ac
index 77b0024..5f13b63 100644
--- a/configfrag.ac
+++ b/configfrag.ac
@@ -102,6 +102,12 @@ AC_DEFINE([STAT_TIME], [1], [STAT_TIME])
# Kernel tracing.
AC_DEFINE([XPR_DEBUG], [1], [XPR_DEBUG])
+
+# Slab allocator debugging facilities.
+AC_DEFINE([SLAB_VERIFY], [0], [SLAB_VERIFY])
+
+# Enable the CPU pool layer in the slab allocator.
+AC_DEFINE([SLAB_USE_CPU_POOLS], [0], [SLAB_USE_CPU_POOLS])
#
# Options.
diff --git a/device/dev_lookup.c b/device/dev_lookup.c
index 2391e8d..98a2d02 100644
--- a/device/dev_lookup.c
+++ b/device/dev_lookup.c
@@ -32,7 +32,7 @@
#include <mach/vm_param.h>
#include <kern/queue.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <device/device_types.h>
#include <device/dev_hdr.h>
@@ -62,7 +62,7 @@ queue_head_t dev_number_hash_table[NDEVHASH];
decl_simple_lock_data(,
dev_number_lock)
-zone_t dev_hdr_zone;
+struct kmem_cache dev_hdr_cache;
/*
* Enter device in the number lookup table.
@@ -151,7 +151,7 @@ device_lookup(name)
simple_unlock(&dev_number_lock);
- new_device = (mach_device_t) zalloc(dev_hdr_zone);
+ new_device = (mach_device_t) kmem_cache_alloc(&dev_hdr_cache);
simple_lock_init(&new_device->ref_lock);
new_device->ref_count = 1;
simple_lock_init(&new_device->lock);
@@ -187,7 +187,7 @@ device_lookup(name)
simple_unlock(&dev_number_lock);
if (new_device != MACH_DEVICE_NULL)
- zfree(dev_hdr_zone, (vm_offset_t)new_device);
+ kmem_cache_free(&dev_hdr_cache, (vm_offset_t)new_device);
}
return (device);
@@ -233,7 +233,7 @@ mach_device_deallocate(device)
simple_unlock(&device->ref_lock);
simple_unlock(&dev_number_lock);
- zfree(dev_hdr_zone, (vm_offset_t)device);
+ kmem_cache_free(&dev_hdr_cache, (vm_offset_t)device);
}
/*
@@ -376,9 +376,6 @@ dev_lookup_init()
simple_lock_init(&dev_port_lock);
- dev_hdr_zone = zinit(sizeof(struct mach_device), 0,
- sizeof(struct mach_device) * NDEVICES,
- PAGE_SIZE,
- FALSE,
- "open device entry");
+ kmem_cache_init(&dev_hdr_cache, "mach_device",
+ sizeof(struct mach_device), 0, NULL, NULL, NULL, 0);
}
diff --git a/device/dev_pager.c b/device/dev_pager.c
index 447781e..bc58a15 100644
--- a/device/dev_pager.c
+++ b/device/dev_pager.c
@@ -44,8 +44,7 @@
#include <kern/debug.h>
#include <kern/printf.h>
#include <kern/queue.h>
-#include <kern/zalloc.h>
-#include <kern/kalloc.h>
+#include <kern/slab.h>
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
@@ -126,7 +125,7 @@ typedef struct dev_pager *dev_pager_t;
#define DEV_PAGER_NULL ((dev_pager_t)0)
-zone_t dev_pager_zone;
+struct kmem_cache dev_pager_cache;
void dev_pager_reference(register dev_pager_t ds)
{
@@ -144,7 +143,7 @@ void dev_pager_deallocate(register dev_pager_t ds)
}
simple_unlock(&ds->lock);
- zfree(dev_pager_zone, (vm_offset_t)ds);
+ kmem_cache_free(&dev_pager_cache, (vm_offset_t)ds);
}
/*
@@ -161,7 +160,7 @@ struct dev_pager_entry {
typedef struct dev_pager_entry *dev_pager_entry_t;
queue_head_t dev_pager_hashtable[DEV_PAGER_HASH_COUNT];
-zone_t dev_pager_hash_zone;
+struct kmem_cache dev_pager_hash_cache;
decl_simple_lock_data(,
dev_pager_hash_lock)
@@ -174,13 +173,8 @@ void dev_pager_hash_init(void)
register vm_size_t size;
size = sizeof(struct dev_pager_entry);
- dev_pager_hash_zone = zinit(
- size,
- 0,
- size * 1000,
- PAGE_SIZE,
- FALSE,
- "dev_pager port hash");
+ kmem_cache_init(&dev_pager_hash_cache, "dev_pager_entry", size, 0,
+ NULL, NULL, NULL, 0);
for (i = 0; i < DEV_PAGER_HASH_COUNT; i++)
queue_init(&dev_pager_hashtable[i]);
simple_lock_init(&dev_pager_hash_lock);
@@ -192,7 +186,7 @@ void dev_pager_hash_insert(
{
register dev_pager_entry_t new_entry;
- new_entry = (dev_pager_entry_t) zalloc(dev_pager_hash_zone);
+ new_entry = (dev_pager_entry_t) kmem_cache_alloc(&dev_pager_hash_cache);
new_entry->name = name_port;
new_entry->pager_rec = rec;
@@ -220,7 +214,7 @@ void dev_pager_hash_delete(ipc_port_t name_port)
}
simple_unlock(&dev_pager_hash_lock);
if (entry)
- zfree(dev_pager_hash_zone, (vm_offset_t)entry);
+ kmem_cache_free(&dev_pager_hash_cache, (vm_offset_t)entry);
}
dev_pager_t dev_pager_hash_lookup(ipc_port_t name_port)
@@ -273,7 +267,7 @@ kern_return_t device_pager_setup(
return (D_SUCCESS);
}
- d = (dev_pager_t) zalloc(dev_pager_zone);
+ d = (dev_pager_t) kmem_cache_alloc(&dev_pager_cache);
if (d == DEV_PAGER_NULL)
return (KERN_RESOURCE_SHORTAGE);
@@ -726,15 +720,11 @@ void device_pager_init(void)
register vm_size_t size;
/*
- * Initialize zone of paging structures.
+ * Initialize cache of paging structures.
*/
size = sizeof(struct dev_pager);
- dev_pager_zone = zinit(size,
- 0,
- (vm_size_t) size * 1000,
- PAGE_SIZE,
- FALSE,
- "device pager structures");
+ kmem_cache_init(&dev_pager_cache, "dev_pager", size, 0,
+ NULL, NULL, NULL, 0);
/*
* Initialize the name port hashing stuff.
diff --git a/device/ds_routines.c b/device/ds_routines.c
index d4a08fb..5a6fdd2 100644
--- a/device/ds_routines.c
+++ b/device/ds_routines.c
@@ -73,7 +73,7 @@
#include <kern/debug.h>
#include <kern/printf.h>
#include <kern/queue.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <kern/thread.h>
#include <kern/task.h>
#include <kern/sched_prim.h>
@@ -130,7 +130,8 @@ static struct device_emulation_ops *emulation_list[] =
&mach_device_emulation_ops,
};
-vm_map_t device_io_map;
+static struct vm_map device_io_map_store;
+vm_map_t device_io_map = &device_io_map_store;
#define NUM_EMULATION (sizeof (emulation_list) / sizeof (emulation_list[0]))
@@ -855,7 +856,7 @@ device_write_get(ior, wait)
*/
if (ior->io_op & IO_INBAND) {
assert(ior->io_count <= sizeof (io_buf_ptr_inband_t));
- new_addr = zalloc(io_inband_zone);
+ new_addr = kmem_cache_alloc(&io_inband_cache);
memcpy((void*)new_addr, ior->io_data, ior->io_count);
ior->io_data = (io_buf_ptr_t)new_addr;
ior->io_alloc_size = sizeof (io_buf_ptr_inband_t);
@@ -935,7 +936,7 @@ device_write_dealloc(ior)
* Inband case.
*/
if (ior->io_op & IO_INBAND) {
- zfree(io_inband_zone, (vm_offset_t)ior->io_data);
+ kmem_cache_free(&io_inband_cache, (vm_offset_t)ior->io_data);
return (TRUE);
}
@@ -1245,7 +1246,7 @@ kern_return_t device_read_alloc(ior, size)
return (KERN_SUCCESS);
if (ior->io_op & IO_INBAND) {
- ior->io_data = (io_buf_ptr_t) zalloc(io_inband_zone);
+ ior->io_data = (io_buf_ptr_t) kmem_cache_alloc(&io_inband_cache);
ior->io_alloc_size = sizeof(io_buf_ptr_inband_t);
} else {
size = round_page(size);
@@ -1338,7 +1339,7 @@ boolean_t ds_read_done(ior)
if (ior->io_count != 0) {
if (ior->io_op & IO_INBAND) {
if (ior->io_alloc_size > 0)
- zfree(io_inband_zone, (vm_offset_t)ior->io_data);
+ kmem_cache_free(&io_inband_cache, (vm_offset_t)ior->io_data);
} else {
register vm_offset_t end_alloc;
@@ -1551,11 +1552,9 @@ void mach_device_init()
queue_init(&io_done_list);
simple_lock_init(&io_done_list_lock);
- device_io_map = kmem_suballoc(kernel_map,
- &device_io_min,
- &device_io_max,
- DEVICE_IO_MAP_SIZE,
- FALSE);
+ kmem_submap(device_io_map, kernel_map, &device_io_min, &device_io_max,
+ DEVICE_IO_MAP_SIZE, FALSE);
+
/*
* If the kernel receives many device_write requests, the
* device_io_map might run out of space. To prevent
@@ -1575,11 +1574,8 @@ void mach_device_init()
*/
device_io_map->wait_for_space = TRUE;
- io_inband_zone = zinit(sizeof(io_buf_ptr_inband_t), 0,
- 1000 * sizeof(io_buf_ptr_inband_t),
- 10 * sizeof(io_buf_ptr_inband_t),
- FALSE,
- "io inband read buffers");
+ kmem_cache_init(&io_inband_cache, "io_buf_ptr_inband",
+ sizeof(io_buf_ptr_inband_t), 0, NULL, NULL, NULL, 0);
mach_device_trap_init();
}
@@ -1615,7 +1611,7 @@ void iowait(ior)
*/
#define IOTRAP_REQSIZE 2048
-zone_t io_trap_zone;
+struct kmem_cache io_trap_cache;
/*
* Initialization. Called from mach_device_init().
@@ -1623,24 +1619,21 @@ zone_t io_trap_zone;
static void
mach_device_trap_init(void)
{
- io_trap_zone = zinit(IOTRAP_REQSIZE, 0,
- 256 * IOTRAP_REQSIZE,
- 16 * IOTRAP_REQSIZE,
- FALSE,
- "wired device trap buffers");
+ kmem_cache_init(&io_trap_cache, "io_req", IOTRAP_REQSIZE, 0,
+ NULL, NULL, NULL, 0);
}
/*
* Allocate an io_req_t.
- * Currently zalloc's from io_trap_zone.
+ * Currently allocates from io_trap_cache.
*
- * Could have lists of different size zones.
+ * Could have lists of different size caches.
* Could call a device-specific routine.
*/
io_req_t
ds_trap_req_alloc(mach_device_t device, vm_size_t data_size)
{
- return (io_req_t) zalloc(io_trap_zone);
+ return (io_req_t) kmem_cache_alloc(&io_trap_cache);
}
/*
@@ -1656,7 +1649,7 @@ ds_trap_write_done(io_req_t ior)
/*
* Should look at reply port and maybe send a message.
*/
- zfree(io_trap_zone, (vm_offset_t) ior);
+ kmem_cache_free(&io_trap_cache, (vm_offset_t) ior);
/*
* Give up device reference from ds_write_trap.
@@ -1732,7 +1725,7 @@ device_write_trap (mach_device_t device, dev_mode_t mode,
*/
mach_device_deallocate(device);
- zfree(io_trap_zone, (vm_offset_t) ior);
+ kmem_cache_free(&io_trap_cache, (vm_offset_t) ior);
return (result);
}
@@ -1823,7 +1816,7 @@ device_writev_trap (mach_device_t device, dev_mode_t mode,
*/
mach_device_deallocate(device);
- zfree(io_trap_zone, (vm_offset_t) ior);
+ kmem_cache_free(&io_trap_cache, (vm_offset_t) ior);
return (result);
}
diff --git a/device/io_req.h b/device/io_req.h
index 162524d..65e23e6 100644
--- a/device/io_req.h
+++ b/device/io_req.h
@@ -35,6 +35,7 @@
#include <mach/port.h>
#include <mach/message.h>
#include <mach/vm_param.h>
+#include <kern/slab.h>
#include <kern/kalloc.h>
#include <kern/lock.h>
#include <vm/vm_page.h>
@@ -124,7 +125,7 @@ struct io_req {
void iodone(io_req_t);
/*
- * Macros to allocate and free IORs - will convert to zones later.
+ * Macros to allocate and free IORs - will convert to caches later.
*/
#define io_req_alloc(ior,size) \
MACRO_BEGIN \
@@ -136,6 +137,6 @@ void iodone(io_req_t);
(kfree((vm_offset_t)(ior), sizeof(struct io_req)))
-zone_t io_inband_zone; /* for inband reads */
+struct kmem_cache io_inband_cache; /* for inband reads */
#endif /* _IO_REQ_ */
diff --git a/device/net_io.c b/device/net_io.c
index 8446395..52a0716 100644
--- a/device/net_io.c
+++ b/device/net_io.c
@@ -61,6 +61,7 @@
#include <kern/printf.h>
#include <kern/queue.h>
#include <kern/sched_prim.h>
+#include <kern/slab.h>
#include <kern/thread.h>
#include <machine/machspl.h>
@@ -302,7 +303,7 @@ struct net_rcv_port {
};
typedef struct net_rcv_port *net_rcv_port_t;
-zone_t net_rcv_zone; /* zone of net_rcv_port structs */
+struct kmem_cache net_rcv_cache; /* cache of net_rcv_port structs */
#define NET_HASH_SIZE 256
@@ -324,7 +325,7 @@ struct net_hash_entry {
};
typedef struct net_hash_entry *net_hash_entry_t;
-zone_t net_hash_entry_zone;
+struct kmem_cache net_hash_entry_cache;
/*
* This structure represents a packet filter with multiple sessions.
@@ -1195,7 +1196,7 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count)
* If there is no match instruction, we allocate
* a normal packet filter structure.
*/
- my_infp = (net_rcv_port_t) zalloc(net_rcv_zone);
+ my_infp = (net_rcv_port_t) kmem_cache_alloc(&net_rcv_cache);
my_infp->rcv_port = rcv_port;
is_new_infp = TRUE;
} else {
@@ -1205,7 +1206,7 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count)
* a hash table to deal with them.
*/
my_infp = 0;
- hash_entp = (net_hash_entry_t) zalloc(net_hash_entry_zone);
+ hash_entp = (net_hash_entry_t) kmem_cache_alloc(&net_hash_entry_cache);
is_new_infp = FALSE;
}
@@ -1310,7 +1311,8 @@ net_set_filter(ifp, rcv_port, priority, filter, filter_count)
ipc_port_release_send(rcv_port);
if (match != 0)
- zfree (net_hash_entry_zone, (vm_offset_t)hash_entp);
+ kmem_cache_free(&net_hash_entry_cache,
+ (vm_offset_t)hash_entp);
rval = D_NO_MEMORY;
goto clean_and_return;
@@ -1526,20 +1528,12 @@ net_io_init()
register vm_size_t size;
size = sizeof(struct net_rcv_port);
- net_rcv_zone = zinit(size,
- 0,
- size * 1000,
- PAGE_SIZE,
- FALSE,
- "net_rcv_port");
+ kmem_cache_init(&net_rcv_cache, "net_rcv_port", size, 0,
+ NULL, NULL, NULL, 0);
size = sizeof(struct net_hash_entry);
- net_hash_entry_zone = zinit(size,
- 0,
- size * 100,
- PAGE_SIZE,
- FALSE,
- "net_hash_entry");
+ kmem_cache_init(&net_hash_entry_cache, "net_hash_entry", size, 0,
+ NULL, NULL, NULL, 0);
size = ikm_plus_overhead(sizeof(struct net_rcv_msg));
net_kmsg_size = round_page(size);
@@ -2167,7 +2161,7 @@ net_free_dead_infp (dead_infp)
nextfp = (net_rcv_port_t) queue_next(&infp->input);
ipc_port_release_send(infp->rcv_port);
net_del_q_info(infp->rcv_qlimit);
- zfree(net_rcv_zone, (vm_offset_t) infp);
+ kmem_cache_free(&net_rcv_cache, (vm_offset_t) infp);
}
}
@@ -2190,7 +2184,7 @@ net_free_dead_entp (dead_entp)
ipc_port_release_send(entp->rcv_port);
net_del_q_info(entp->rcv_qlimit);
- zfree(net_hash_entry_zone, (vm_offset_t) entp);
+ kmem_cache_free(&net_hash_entry_cache, (vm_offset_t) entp);
}
}
diff --git a/i386/Makefrag.am b/i386/Makefrag.am
index ab3502a..aca4215 100644
--- a/i386/Makefrag.am
+++ b/i386/Makefrag.am
@@ -136,7 +136,6 @@ libkernel_a_SOURCES += \
i386/i386/vm_param.h \
i386/i386/vm_tuning.h \
i386/i386/xpr.h \
- i386/i386/zalloc.h \
i386/intel/pmap.c \
i386/intel/pmap.h \
i386/intel/read_fault.c \
diff --git a/i386/configfrag.ac b/i386/configfrag.ac
index e4ce97e..77f66af 100644
--- a/i386/configfrag.ac
+++ b/i386/configfrag.ac
@@ -25,6 +25,9 @@ dnl USE OF THIS SOFTWARE.
AC_DEFINE([__ELF__], [1], [__ELF__])
AC_DEFINE([i386], [1], [i386])
+ # Determines the size of the CPU cache line.
+ AC_DEFINE([CPU_L1_SHIFT], [6], [CPU_L1_SHIFT])
+
[# Does the architecture provide machine-specific interfaces?
mach_machine_routines=1;;
*)]
diff --git a/i386/i386/fpu.c b/i386/i386/fpu.c
index 2626a38..f2c8124 100644
--- a/i386/i386/fpu.c
+++ b/i386/i386/fpu.c
@@ -44,10 +44,9 @@
#include <kern/debug.h>
#include <machine/machspl.h> /* spls */
-#include <kern/mach_param.h>
#include <kern/printf.h>
#include <kern/thread.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <i386/thread.h>
#include <i386/fpu.h>
@@ -72,7 +71,7 @@
extern void i386_exception();
int fp_kind = FP_387; /* 80387 present */
-zone_t ifps_zone; /* zone for FPU save area */
+struct kmem_cache ifps_cache; /* cache for FPU save area */
static unsigned long mxcsr_feature_mask = 0xffffffff; /* Always AND user-provided mxcsr with this security mask */
void fp_save(thread_t thread);
@@ -193,10 +192,9 @@ init_fpu()
void
fpu_module_init()
{
- ifps_zone = zinit(sizeof(struct i386_fpsave_state), 16,
- THREAD_MAX * sizeof(struct i386_fpsave_state),
- THREAD_CHUNK * sizeof(struct i386_fpsave_state),
- 0, "i386 fpsave state");
+ kmem_cache_init(&ifps_cache, "i386_fpsave_state",
+ sizeof(struct i386_fpsave_state), 16,
+ NULL, NULL, NULL, 0);
}
/*
@@ -221,7 +219,7 @@ ASSERT_IPL(SPL0);
clear_fpu();
}
#endif /* NCPUS == 1 */
- zfree(ifps_zone, (vm_offset_t) fps);
+ kmem_cache_free(&ifps_cache, (vm_offset_t) fps);
}
/* The two following functions were stolen from Linux's i387.c */
@@ -335,7 +333,7 @@ ASSERT_IPL(SPL0);
simple_unlock(&pcb->lock);
if (ifps != 0) {
- zfree(ifps_zone, (vm_offset_t) ifps);
+ kmem_cache_free(&ifps_cache, (vm_offset_t) ifps);
}
}
else {
@@ -356,7 +354,7 @@ ASSERT_IPL(SPL0);
if (ifps == 0) {
if (new_ifps == 0) {
simple_unlock(&pcb->lock);
- new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+ new_ifps = (struct i386_fpsave_state *) kmem_cache_alloc(&ifps_cache);
goto Retry;
}
ifps = new_ifps;
@@ -396,7 +394,7 @@ ASSERT_IPL(SPL0);
simple_unlock(&pcb->lock);
if (new_ifps != 0)
- zfree(ifps_zone, (vm_offset_t) new_ifps);
+ kmem_cache_free(&ifps_cache, (vm_offset_t) new_ifps);
}
return KERN_SUCCESS;
@@ -609,7 +607,7 @@ fpextovrflt()
clear_fpu();
if (ifps)
- zfree(ifps_zone, (vm_offset_t) ifps);
+ kmem_cache_free(&ifps_cache, (vm_offset_t) ifps);
/*
* Raise exception.
@@ -785,7 +783,7 @@ fp_load(thread)
ASSERT_IPL(SPL0);
ifps = pcb->ims.ifps;
if (ifps == 0) {
- ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+ ifps = (struct i386_fpsave_state *) kmem_cache_alloc(&ifps_cache);
memset(ifps, 0, sizeof *ifps);
pcb->ims.ifps = ifps;
fpinit();
@@ -836,7 +834,7 @@ fp_state_alloc()
pcb_t pcb = current_thread()->pcb;
struct i386_fpsave_state *ifps;
- ifps = (struct i386_fpsave_state *)zalloc(ifps_zone);
+ ifps = (struct i386_fpsave_state *)kmem_cache_alloc(&ifps_cache);
memset(ifps, 0, sizeof *ifps);
pcb->ims.ifps = ifps;
diff --git a/i386/i386/io_perm.c b/i386/i386/io_perm.c
index df25cc6..8bacb8d 100644
--- a/i386/i386/io_perm.c
+++ b/i386/i386/io_perm.c
@@ -54,7 +54,8 @@
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
+#include <kern/kalloc.h>
#include <kern/lock.h>
#include <kern/queue.h>
#include <kern/thread.h>
@@ -257,12 +258,12 @@ i386_io_perm_modify (task_t target_task, io_perm_t io_perm, boolean_t enable)
if (!iopb)
{
simple_unlock (&target_task->machine.iopb_lock);
- iopb = (unsigned char *) zalloc (machine_task_iopb_zone);
+ iopb = (unsigned char *) kmem_cache_alloc (&machine_task_iopb_cache);
simple_lock (&target_task->machine.iopb_lock);
if (target_task->machine.iopb)
{
if (iopb)
- zfree (machine_task_iopb_zone, (vm_offset_t) iopb);
+ kmem_cache_free (&machine_task_iopb_cache, (vm_offset_t) iopb);
iopb = target_task->machine.iopb;
iopb_size = target_task->machine.iopb_size;
}
diff --git a/i386/i386/machine_task.c b/i386/i386/machine_task.c
index 35b89e0..62b22e3 100644
--- a/i386/i386/machine_task.c
+++ b/i386/i386/machine_task.c
@@ -22,15 +22,14 @@
#include <kern/lock.h>
#include <mach/mach_types.h>
-#include <kern/zalloc.h>
-#include <kern/mach_param.h>
+#include <kern/slab.h>
#include <machine/task.h>
#include <machine/io_perm.h>
-/* The zone which holds our IO permission bitmaps. */
-zone_t machine_task_iopb_zone;
+/* The cache which holds our IO permission bitmaps. */
+struct kmem_cache machine_task_iopb_cache;
/* Initialize the machine task module. The function is called once at
@@ -38,11 +37,8 @@ zone_t machine_task_iopb_zone;
void
machine_task_module_init (void)
{
- machine_task_iopb_zone = zinit (IOPB_BYTES, 0,
- TASK_MAX * IOPB_BYTES,
- IOPB_BYTES,
- ZONE_COLLECTABLE | ZONE_EXHAUSTIBLE,
- "i386 machine task iopb");
+ kmem_cache_init (&machine_task_iopb_cache, "i386_task_iopb", IOPB_BYTES, 0,
+ NULL, NULL, NULL, 0);
}
@@ -62,7 +58,8 @@ void
machine_task_terminate (task_t task)
{
if (task->machine.iopb)
- zfree (machine_task_iopb_zone, (vm_offset_t) task->machine.iopb);
+ kmem_cache_free (&machine_task_iopb_cache,
+ (vm_offset_t) task->machine.iopb);
}
@@ -74,7 +71,8 @@ machine_task_collect (task_t task)
simple_lock (&task->machine.iopb_lock);
if (task->machine.iopb_size == 0 && task->machine.iopb)
{
- zfree (machine_task_iopb_zone, (vm_offset_t) task->machine.iopb);
+ kmem_cache_free (&machine_task_iopb_cache,
+ (vm_offset_t) task->machine.iopb);
task->machine.iopb = 0;
}
simple_unlock (&task->machine.iopb_lock);
diff --git a/i386/i386/pcb.c b/i386/i386/pcb.c
index fffa92a..e065dbb 100644
--- a/i386/i386/pcb.c
+++ b/i386/i386/pcb.c
@@ -36,9 +36,9 @@
#include "vm_param.h"
#include <kern/counters.h>
#include <kern/debug.h>
-#include <kern/mach_param.h>
#include <kern/thread.h>
#include <kern/sched_prim.h>
+#include <kern/slab.h>
#include <vm/vm_kern.h>
#include <vm/pmap.h>
@@ -65,7 +65,7 @@ extern void Thread_continue();
extern void user_ldt_free();
-zone_t pcb_zone;
+struct kmem_cache pcb_cache;
vm_offset_t kernel_stack[NCPUS]; /* top of active_stack */
@@ -369,10 +369,8 @@ thread_t switch_context(old, continuation, new)
void pcb_module_init()
{
- pcb_zone = zinit(sizeof(struct pcb), 0,
- THREAD_MAX * sizeof(struct pcb),
- THREAD_CHUNK * sizeof(struct pcb),
- 0, "i386 pcb state");
+ kmem_cache_init(&pcb_cache, "pcb", sizeof(struct pcb), 0,
+ NULL, NULL, NULL, 0);
fpu_module_init();
}
@@ -382,7 +380,7 @@ void pcb_init(thread)
{
register pcb_t pcb;
- pcb = (pcb_t) zalloc(pcb_zone);
+ pcb = (pcb_t) kmem_cache_alloc(&pcb_cache);
if (pcb == 0)
panic("pcb_init");
@@ -422,7 +420,7 @@ void pcb_terminate(thread)
fp_free(pcb->ims.ifps);
if (pcb->ims.ldt != 0)
user_ldt_free(pcb->ims.ldt);
- zfree(pcb_zone, (vm_offset_t) pcb);
+ kmem_cache_free(&pcb_cache, (vm_offset_t) pcb);
thread->pcb = 0;
}
diff --git a/i386/i386/task.h b/i386/i386/task.h
index ca8de04..0060ad4 100644
--- a/i386/i386/task.h
+++ b/i386/i386/task.h
@@ -24,7 +24,7 @@
#define _I386_TASK_H_
#include <kern/kern_types.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
/* The machine specific data of a task. */
struct machine_task
@@ -41,7 +41,7 @@ struct machine_task
typedef struct machine_task machine_task_t;
-extern zone_t machine_task_iopb_zone;
+extern struct kmem_cache machine_task_iopb_cache;
/* Initialize the machine task module. The function is called once at
start up by task_init in kern/task.c. */
diff --git a/i386/i386/zalloc.h b/i386/i386/zalloc.h
deleted file mode 100644
index bf7cf6b..0000000
--- a/i386/i386/zalloc.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 1996-1994 The University of Utah and
- * the Computer Systems Laboratory (CSL). All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software is hereby
- * granted provided that (1) source code retains these copyright, permission,
- * and disclaimer notices, and (2) redistributions including binaries
- * reproduce the notices in supporting documentation, and (3) all advertising
- * materials mentioning features or use of this software display the following
- * acknowledgement: ``This product includes software developed by the
- * Computer Systems Laboratory at the University of Utah.''
- *
- * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
- * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
- * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * Utah $Hdr: zalloc.h 1.4 94/12/16$
- * Author: Bryan Ford
- */
-
-#ifndef _I386_ZALLOC_H_
-#define _I386_ZALLOC_H_
-
-#include <kern/zalloc.h>
-
-#endif /* _I386_ZALLOC_H_ */
diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c
index c448e57..d6e18e5 100644
--- a/i386/intel/pmap.c
+++ b/i386/intel/pmap.c
@@ -63,7 +63,7 @@
#include <kern/debug.h>
#include <kern/printf.h>
#include <kern/thread.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <kern/lock.h>
@@ -113,7 +113,7 @@ pv_entry_t pv_head_table; /* array of entries, one per page */
/*
* pv_list entries are kept on a list that can only be accessed
* with the pmap system locked (at SPLVM, not in the cpus_active set).
- * The list is refilled from the pv_list_zone if it becomes empty.
+ * The list is refilled from the pv_list_cache if it becomes empty.
*/
pv_entry_t pv_free_list; /* free list at SPLVM */
decl_simple_lock_data(, pv_free_list_lock)
@@ -133,7 +133,7 @@ decl_simple_lock_data(, pv_free_list_lock)
simple_unlock(&pv_free_list_lock); \
}
-zone_t pv_list_zone; /* zone of pv_entry structures */
+struct kmem_cache pv_list_cache; /* cache of pv_entry structures */
/*
* Each entry in the pv_head_table is locked by a bit in the
@@ -400,7 +400,7 @@ struct pmap_update_list cpu_update_list[NCPUS];
struct pmap kernel_pmap_store;
pmap_t kernel_pmap;
-struct zone *pmap_zone; /* zone of pmap structures */
+struct kmem_cache pmap_cache; /* cache of pmap structures */
int pmap_debug = 0; /* flag for debugging prints */
@@ -937,13 +937,13 @@ void pmap_init()
pmap_phys_attributes = (char *) addr;
/*
- * Create the zone of physical maps,
+ * Create the cache of physical maps,
* and of the physical-to-virtual entries.
*/
s = (vm_size_t) sizeof(struct pmap);
- pmap_zone = zinit(s, 0, 400*s, 4096, 0, "pmap"); /* XXX */
+ kmem_cache_init(&pmap_cache, "pmap", s, 0, NULL, NULL, NULL, 0);
s = (vm_size_t) sizeof(struct pv_entry);
- pv_list_zone = zinit(s, 0, 10000*s, 4096, 0, "pv_list"); /* XXX */
+ kmem_cache_init(&pv_list_cache, "pv_entry", s, 0, NULL, NULL, NULL, 0);
#if NCPUS > 1
/*
@@ -1009,7 +1009,7 @@ pmap_page_table_page_alloc()
/*
* We cannot allocate the pmap_object in pmap_init,
- * because it is called before the zone package is up.
+ * because it is called before the cache package is up.
* Allocate it now if it is missing.
*/
if (pmap_object == VM_OBJECT_NULL)
@@ -1113,11 +1113,11 @@ pmap_t pmap_create(size)
}
/*
- * Allocate a pmap struct from the pmap_zone. Then allocate
- * the page descriptor table from the pd_zone.
+ * Allocate a pmap struct from the pmap_cache. Then allocate
+ * the page descriptor table.
*/
- p = (pmap_t) zalloc(pmap_zone);
+ p = (pmap_t) kmem_cache_alloc(&pmap_cache);
if (p == PMAP_NULL)
panic("pmap_create");
@@ -1232,7 +1232,7 @@ void pmap_destroy(p)
#endif /* MACH_XEN */
kmem_free(kernel_map, (vm_offset_t)p->pdpbase, INTEL_PGBYTES);
#endif /* PAE */
- zfree(pmap_zone, (vm_offset_t) p);
+ kmem_cache_free(&pmap_cache, (vm_offset_t) p);
}
/*
@@ -1782,7 +1782,7 @@ if (pmap_debug) printf("pmap(%x, %x)\n", v, pa);
/*
* Must allocate a new pvlist entry while we're unlocked;
- * zalloc may cause pageout (which will lock the pmap system).
+ * Allocating may cause pageout (which will lock the pmap system).
* If we determine we need a pvlist entry, we will unlock
* and allocate one. Then we will retry, throughing away
* the allocated entry later (if we no longer need it).
@@ -1966,9 +1966,9 @@ Retry:
PMAP_READ_UNLOCK(pmap, spl);
/*
- * Refill from zone.
+ * Refill from cache.
*/
- pv_e = (pv_entry_t) zalloc(pv_list_zone);
+ pv_e = (pv_entry_t) kmem_cache_alloc(&pv_list_cache);
goto Retry;
}
}
diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h
index 7ba7d2c..e02ad36 100644
--- a/i386/intel/pmap.h
+++ b/i386/intel/pmap.h
@@ -37,7 +37,6 @@
#ifndef __ASSEMBLER__
-#include <kern/zalloc.h>
#include <kern/lock.h>
#include <mach/machine/vm_param.h>
#include <mach/vm_statistics.h>
diff --git a/include/mach_debug/mach_debug.defs b/include/mach_debug/mach_debug.defs
index 2a58dc4..053c3fe 100644
--- a/include/mach_debug/mach_debug.defs
+++ b/include/mach_debug/mach_debug.defs
@@ -42,17 +42,7 @@ skip; /* host_ipc_statistics_reset */
skip; /* host_callout_info */
skip; /* host_callout_statistics */
skip; /* host_callout_statistics_reset */
-
-/*
- * Returns information about the memory allocation zones.
- */
-routine host_zone_info(
- host : host_t;
- out names : zone_name_array_t,
- CountInOut, Dealloc;
- out info : zone_info_array_t,
- CountInOut, Dealloc);
-
+skip; /* host_zone_info */
skip; /* host_ipc_bucket_info */
#if !defined(MACH_IPC_DEBUG) || MACH_IPC_DEBUG
@@ -228,6 +218,14 @@ routine mach_vm_object_pages(
out pages : vm_page_info_array_t,
CountInOut, Dealloc);
+/*
+ * Returns information about the memory allocation caches.
+ */
+routine host_slab_info(
+ host : host_t;
+ out info : cache_info_array_t,
+ CountInOut, Dealloc);
+
#else /* !defined(MACH_VM_DEBUG) || MACH_VM_DEBUG */
skip; /* mach_vm_region_info */
skip; /* mach_vm_object_info */
diff --git a/include/mach_debug/mach_debug_types.defs b/include/mach_debug/mach_debug_types.defs
index 9f1976f..f60125a 100644
--- a/include/mach_debug/mach_debug_types.defs
+++ b/include/mach_debug/mach_debug_types.defs
@@ -32,11 +32,8 @@
#include <mach/std_types.defs>
-type zone_name_t = struct[80] of char;
-type zone_name_array_t = array[] of zone_name_t;
-
-type zone_info_t = struct[9] of integer_t;
-type zone_info_array_t = array[] of zone_info_t;
+type cache_info_t = struct[19] of integer_t;
+type cache_info_array_t = array[] of cache_info_t;
type hash_info_bucket_t = struct[1] of natural_t;
type hash_info_bucket_array_t = array[] of hash_info_bucket_t;
diff --git a/include/mach_debug/mach_debug_types.h b/include/mach_debug/mach_debug_types.h
index 2ba0cb1..5d4efcd 100644
--- a/include/mach_debug/mach_debug_types.h
+++ b/include/mach_debug/mach_debug_types.h
@@ -32,7 +32,7 @@
#include <mach_debug/ipc_info.h>
#include <mach_debug/vm_info.h>
-#include <mach_debug/zone_info.h>
+#include <mach_debug/slab_info.h>
#include <mach_debug/hash_info.h>
typedef char symtab_name_t[32];
diff --git a/include/mach_debug/zone_info.h b/include/mach_debug/slab_info.h
index 1b36fe0..37dcb8c 100644
--- a/include/mach_debug/zone_info.h
+++ b/include/mach_debug/slab_info.h
@@ -24,38 +24,39 @@
* the rights to redistribute these changes.
*/
-#ifndef _MACH_DEBUG_ZONE_INFO_H_
-#define _MACH_DEBUG_ZONE_INFO_H_
+#ifndef _MACH_DEBUG_SLAB_INFO_H_
+#define _MACH_DEBUG_SLAB_INFO_H_
-#include <mach/boolean.h>
-#include <mach/machine/vm_types.h>
+#include <sys/types.h>
/*
* Remember to update the mig type definitions
* in mach_debug_types.defs when adding/removing fields.
*/
-#define ZONE_NAME_MAX_LEN 80
-
-typedef struct zone_name {
- char zn_name[ZONE_NAME_MAX_LEN];
-} zone_name_t;
-
-typedef zone_name_t *zone_name_array_t;
-
-
-typedef struct zone_info {
- integer_t zi_count; /* Number of elements used now */
- vm_size_t zi_cur_size; /* current memory utilization */
- vm_size_t zi_max_size; /* how large can this zone grow */
- vm_size_t zi_elem_size; /* size of an element */
- vm_size_t zi_alloc_size; /* size used for more memory */
-/*boolean_t*/integer_t zi_pageable; /* zone pageable? */
-/*boolean_t*/integer_t zi_sleepable; /* sleep if empty? */
-/*boolean_t*/integer_t zi_exhaustible; /* merely return if empty? */
-/*boolean_t*/integer_t zi_collectable; /* garbage collect elements? */
-} zone_info_t;
-
-typedef zone_info_t *zone_info_array_t;
-
-#endif /* _MACH_DEBUG_ZONE_INFO_H_ */
+#define CACHE_NAME_MAX_LEN 32
+
+#define CACHE_FLAGS_NO_CPU_POOL 0x01
+#define CACHE_FLAGS_SLAB_EXTERNAL 0x02
+#define CACHE_FLAGS_NO_RECLAIM 0x04
+#define CACHE_FLAGS_VERIFY 0x08
+#define CACHE_FLAGS_DIRECT 0x10
+
+typedef struct cache_info {
+ int flags;
+ size_t cpu_pool_size;
+ size_t obj_size;
+ size_t align;
+ size_t buf_size;
+ size_t slab_size;
+ unsigned long bufs_per_slab;
+ unsigned long nr_objs;
+ unsigned long nr_bufs;
+ unsigned long nr_slabs;
+ unsigned long nr_free_slabs;
+ char name[CACHE_NAME_MAX_LEN];
+} cache_info_t;
+
+typedef cache_info_t *cache_info_array_t;
+
+#endif /* _MACH_DEBUG_SLAB_INFO_H_ */
diff --git a/ipc/ipc_entry.c b/ipc/ipc_entry.c
index 42e8dd8..3a06244 100644
--- a/ipc/ipc_entry.c
+++ b/ipc/ipc_entry.c
@@ -41,7 +41,7 @@
#include <mach/port.h>
#include <kern/assert.h>
#include <kern/sched_prim.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <ipc/port.h>
#include <ipc/ipc_types.h>
#include <ipc/ipc_entry.h>
@@ -51,7 +51,7 @@
#include <ipc/ipc_table.h>
#include <ipc/ipc_object.h>
-zone_t ipc_tree_entry_zone;
+struct kmem_cache ipc_tree_entry_cache;
/*
* Routine: ipc_entry_tree_collision
diff --git a/ipc/ipc_entry.h b/ipc/ipc_entry.h
index a577cf0..6afa4f6 100644
--- a/ipc/ipc_entry.h
+++ b/ipc/ipc_entry.h
@@ -41,7 +41,7 @@
#include <mach/mach_types.h>
#include <mach/port.h>
#include <mach/kern_return.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <ipc/port.h>
#include <ipc/ipc_table.h>
#include <ipc/ipc_types.h>
@@ -129,10 +129,10 @@ typedef struct ipc_tree_entry {
#define ite_request ite_entry.ie_request
#define ite_next ite_entry.hash.tree
-extern zone_t ipc_tree_entry_zone;
+extern struct kmem_cache ipc_tree_entry_cache;
-#define ite_alloc() ((ipc_tree_entry_t) zalloc(ipc_tree_entry_zone))
-#define ite_free(ite) zfree(ipc_tree_entry_zone, (vm_offset_t) (ite))
+#define ite_alloc() ((ipc_tree_entry_t) kmem_cache_alloc(&ipc_tree_entry_cache))
+#define ite_free(ite) kmem_cache_free(&ipc_tree_entry_cache, (vm_offset_t) (ite))
extern ipc_entry_t
diff --git a/ipc/ipc_hash.c b/ipc/ipc_hash.c
index c241289..ad05016 100644
--- a/ipc/ipc_hash.c
+++ b/ipc/ipc_hash.c
@@ -535,13 +535,9 @@ ipc_hash_init(void)
{
ipc_hash_index_t i;
- /* if not configured, initialize ipc_hash_global_size */
+ /* initialize ipc_hash_global_size */
- if (ipc_hash_global_size == 0) {
- ipc_hash_global_size = ipc_tree_entry_max >> 8;
- if (ipc_hash_global_size < 32)
- ipc_hash_global_size = 32;
- }
+ ipc_hash_global_size = IPC_HASH_GLOBAL_SIZE;
/* make sure it is a power of two */
diff --git a/ipc/ipc_hash.h b/ipc/ipc_hash.h
index 58c56ca..929ba77 100644
--- a/ipc/ipc_hash.h
+++ b/ipc/ipc_hash.h
@@ -67,6 +67,8 @@ ipc_hash_delete(ipc_space_t space, ipc_object_t obj,
* and the local primitives, for table entries.
*/
+#define IPC_HASH_GLOBAL_SIZE 256
+
extern boolean_t
ipc_hash_global_lookup(ipc_space_t space, ipc_object_t obj,
mach_port_t *namep, ipc_tree_entry_t *entryp);
diff --git a/ipc/ipc_init.c b/ipc/ipc_init.c
index e9ca64d..ca7e791 100644
--- a/ipc/ipc_init.c
+++ b/ipc/ipc_init.c
@@ -35,8 +35,8 @@
*/
#include <mach/kern_return.h>
-#include <kern/mach_param.h>
#include <kern/ipc_host.h>
+#include <kern/slab.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <ipc/ipc_entry.h>
@@ -52,14 +52,10 @@
-vm_map_t ipc_kernel_map;
+static struct vm_map ipc_kernel_map_store;
+vm_map_t ipc_kernel_map = &ipc_kernel_map_store;
vm_size_t ipc_kernel_map_size = 8 * 1024 * 1024;
-int ipc_space_max = SPACE_MAX;
-int ipc_tree_entry_max = ITE_MAX;
-int ipc_port_max = PORT_MAX;
-int ipc_pset_max = SET_MAX;
-
/*
* Routine: ipc_bootstrap
* Purpose:
@@ -77,28 +73,17 @@ ipc_bootstrap(void)
ipc_port_timestamp_lock_init();
ipc_port_timestamp_data = 0;
- ipc_space_zone = zinit(sizeof(struct ipc_space), 0,
- ipc_space_max * sizeof(struct ipc_space),
- sizeof(struct ipc_space),
- 0, "ipc spaces");
-
- ipc_tree_entry_zone =
- zinit(sizeof(struct ipc_tree_entry), 0,
- ipc_tree_entry_max * sizeof(struct ipc_tree_entry),
- sizeof(struct ipc_tree_entry),
- IPC_ZONE_TYPE, "ipc tree entries");
-
- ipc_object_zones[IOT_PORT] =
- zinit(sizeof(struct ipc_port), 0,
- ipc_port_max * sizeof(struct ipc_port),
- sizeof(struct ipc_port),
- 0, "ipc ports");
-
- ipc_object_zones[IOT_PORT_SET] =
- zinit(sizeof(struct ipc_pset), 0,
- ipc_pset_max * sizeof(struct ipc_pset),
- sizeof(struct ipc_pset),
- IPC_ZONE_TYPE, "ipc port sets");
+ kmem_cache_init(&ipc_space_cache, "ipc_space",
+ sizeof(struct ipc_space), 0, NULL, NULL, NULL, 0);
+
+ kmem_cache_init(&ipc_tree_entry_cache, "ipc_tree_entry",
+ sizeof(struct ipc_tree_entry), 0, NULL, NULL, NULL, 0);
+
+ kmem_cache_init(&ipc_object_caches[IOT_PORT], "ipc_port",
+ sizeof(struct ipc_port), 0, NULL, NULL, NULL, 0);
+
+ kmem_cache_init(&ipc_object_caches[IOT_PORT_SET], "ipc_pset",
+ sizeof(struct ipc_pset), 0, NULL, NULL, NULL, 0);
/* create special spaces */
@@ -127,8 +112,8 @@ ipc_init()
{
vm_offset_t min, max;
- ipc_kernel_map = kmem_suballoc(kernel_map, &min, &max,
- ipc_kernel_map_size, TRUE);
+ kmem_submap(ipc_kernel_map, kernel_map, &min, &max,
+ ipc_kernel_map_size, TRUE);
ipc_host_init();
}
diff --git a/ipc/ipc_init.h b/ipc/ipc_init.h
index b2f1dd4..8dd64bb 100644
--- a/ipc/ipc_init.h
+++ b/ipc/ipc_init.h
@@ -37,14 +37,6 @@
#ifndef _IPC_IPC_INIT_H_
#define _IPC_IPC_INIT_H_
-/* all IPC zones should be exhaustible */
-#define IPC_ZONE_TYPE ZONE_EXHAUSTIBLE
-
-extern int ipc_space_max;
-extern int ipc_tree_entry_max;
-extern int ipc_port_max;
-extern int ipc_pset_max;
-
/*
* Exported interfaces
*/
diff --git a/ipc/ipc_marequest.c b/ipc/ipc_marequest.c
index 540382a..06c53eb 100644
--- a/ipc/ipc_marequest.c
+++ b/ipc/ipc_marequest.c
@@ -37,9 +37,8 @@
#include <mach/message.h>
#include <mach/port.h>
#include <kern/lock.h>
-#include <kern/mach_param.h>
#include <kern/kalloc.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <ipc/port.h>
#include <ipc/ipc_init.h>
#include <ipc/ipc_space.h>
@@ -58,11 +57,10 @@
#endif
-zone_t ipc_marequest_zone;
-int ipc_marequest_max = IMAR_MAX;
+struct kmem_cache ipc_marequest_cache;
-#define imar_alloc() ((ipc_marequest_t) zalloc(ipc_marequest_zone))
-#define imar_free(imar) zfree(ipc_marequest_zone, (vm_offset_t) (imar))
+#define imar_alloc() ((ipc_marequest_t) kmem_cache_alloc(&ipc_marequest_cache))
+#define imar_free(imar) kmem_cache_free(&ipc_marequest_cache, (vm_offset_t) (imar))
typedef unsigned int ipc_marequest_index_t;
@@ -100,13 +98,9 @@ ipc_marequest_init(void)
{
ipc_marequest_index_t i;
- /* if not configured, initialize ipc_marequest_size */
+ /* initialize ipc_marequest_size */
- if (ipc_marequest_size == 0) {
- ipc_marequest_size = ipc_marequest_max >> 8;
- if (ipc_marequest_size < 16)
- ipc_marequest_size = 16;
- }
+ ipc_marequest_size = IPC_MAREQUEST_SIZE;
/* make sure it is a power of two */
@@ -142,11 +136,8 @@ ipc_marequest_init(void)
bucket->imarb_head = IMAR_NULL;
}
- ipc_marequest_zone =
- zinit(sizeof(struct ipc_marequest), 0,
- ipc_marequest_max * sizeof(struct ipc_marequest),
- sizeof(struct ipc_marequest),
- IPC_ZONE_TYPE, "ipc msg-accepted requests");
+ kmem_cache_init(&ipc_marequest_cache, "ipc_marequest",
+ sizeof(struct ipc_marequest), 0, NULL, NULL, NULL, 0);
}
/*
@@ -439,7 +430,7 @@ ipc_marequest_info(maxp, info, count)
info[i].hib_count = bucket_count;
}
- *maxp = ipc_marequest_max;
+ *maxp = (unsigned int)-1;
return ipc_marequest_size;
}
diff --git a/ipc/ipc_marequest.h b/ipc/ipc_marequest.h
index eb3746d..4f6f758 100644
--- a/ipc/ipc_marequest.h
+++ b/ipc/ipc_marequest.h
@@ -70,6 +70,7 @@ typedef struct ipc_marequest {
#define IMAR_NULL ((ipc_marequest_t) 0)
+#define IPC_MAREQUEST_SIZE 16
extern void
ipc_marequest_init(void);
diff --git a/ipc/ipc_object.c b/ipc/ipc_object.c
index a7a7ddb..4850fb1 100644
--- a/ipc/ipc_object.c
+++ b/ipc/ipc_object.c
@@ -47,8 +47,9 @@
#include <ipc/ipc_pset.h>
#include <kern/debug.h>
#include <kern/printf.h>
+#include <kern/slab.h>
-zone_t ipc_object_zones[IOT_NUMBER];
+struct kmem_cache ipc_object_caches[IOT_NUMBER];
diff --git a/ipc/ipc_object.h b/ipc/ipc_object.h
index 2bbf8bd..adf5bca 100644
--- a/ipc/ipc_object.h
+++ b/ipc/ipc_object.h
@@ -39,7 +39,7 @@
#include <ipc/ipc_types.h>
#include <kern/lock.h>
#include <kern/macro_help.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
typedef unsigned int ipc_object_refs_t;
typedef unsigned int ipc_object_bits_t;
@@ -57,7 +57,7 @@ typedef struct ipc_object {
#define IO_VALID(io) (((io) != IO_NULL) && ((io) != IO_DEAD))
#define IO_BITS_KOTYPE 0x0000ffff /* used by the object */
-#define IO_BITS_OTYPE 0x7fff0000 /* determines a zone */
+#define IO_BITS_OTYPE 0x7fff0000 /* determines a cache */
#define IO_BITS_ACTIVE 0x80000000U /* is object alive? */
#define io_active(io) ((int)(io)->io_bits < 0) /* hack */
@@ -75,13 +75,13 @@ typedef struct ipc_object {
#define IOT_PORT_SET 1
#define IOT_NUMBER 2 /* number of types used */
-extern zone_t ipc_object_zones[IOT_NUMBER];
+extern struct kmem_cache ipc_object_caches[IOT_NUMBER];
#define io_alloc(otype) \
- ((ipc_object_t) zalloc(ipc_object_zones[(otype)]))
+ ((ipc_object_t) kmem_cache_alloc(&ipc_object_caches[(otype)]))
#define io_free(otype, io) \
- zfree(ipc_object_zones[(otype)], (vm_offset_t) (io))
+ kmem_cache_free(&ipc_object_caches[(otype)], (vm_offset_t) (io))
#define io_lock_init(io) simple_lock_init(&(io)->io_lock_data)
#define io_lock(io) simple_lock(&(io)->io_lock_data)
diff --git a/ipc/ipc_space.c b/ipc/ipc_space.c
index 0f50f15..ab55e83 100644
--- a/ipc/ipc_space.c
+++ b/ipc/ipc_space.c
@@ -43,7 +43,7 @@
#include <mach/port.h>
#include <kern/assert.h>
#include <kern/sched_prim.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <ipc/port.h>
#include <ipc/ipc_entry.h>
#include <ipc/ipc_splay.h>
@@ -55,7 +55,7 @@
-zone_t ipc_space_zone;
+struct kmem_cache ipc_space_cache;
ipc_space_t ipc_space_kernel;
ipc_space_t ipc_space_reply;
diff --git a/ipc/ipc_space.h b/ipc/ipc_space.h
index d030bf7..c4683d2 100644
--- a/ipc/ipc_space.h
+++ b/ipc/ipc_space.h
@@ -44,6 +44,7 @@
#include <mach/mach_types.h>
#include <kern/macro_help.h>
#include <kern/lock.h>
+#include <kern/slab.h>
#include <ipc/ipc_splay.h>
#include <ipc/ipc_types.h>
@@ -82,10 +83,10 @@ struct ipc_space {
#define IS_NULL ((ipc_space_t) 0)
-extern zone_t ipc_space_zone;
+extern struct kmem_cache ipc_space_cache;
-#define is_alloc() ((ipc_space_t) zalloc(ipc_space_zone))
-#define is_free(is) zfree(ipc_space_zone, (vm_offset_t) (is))
+#define is_alloc() ((ipc_space_t) kmem_cache_alloc(&ipc_space_cache))
+#define is_free(is) kmem_cache_free(&ipc_space_cache, (vm_offset_t) (is))
extern struct ipc_space *ipc_space_kernel;
extern struct ipc_space *ipc_space_reply;
diff --git a/ipc/ipc_table.c b/ipc/ipc_table.c
index e572358..d5b7904 100644
--- a/ipc/ipc_table.c
+++ b/ipc/ipc_table.c
@@ -50,13 +50,6 @@ void ipc_table_fill(
unsigned int min,
vm_size_t elemsize);
-/*
- * We borrow the kalloc map, rather than creating
- * yet another submap of the kernel map.
- */
-
-extern vm_map_t kalloc_map;
-
ipc_table_size_t ipc_table_entries;
unsigned int ipc_table_entries_size = 512;
diff --git a/kern/act.c b/kern/act.c
index d0a03a3..36fa79c 100644
--- a/kern/act.c
+++ b/kern/act.c
@@ -30,8 +30,7 @@
#include <mach/kern_return.h>
#include <mach/alert.h>
-#include <kern/mach_param.h> /* XXX INCALL_... */
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <kern/thread.h>
#include <kern/task.h>
#include <kern/debug.h>
@@ -47,7 +46,7 @@ static void special_handler(ReturnHandler *rh, struct Act *act);
#endif
#ifndef ACT_STATIC_KLUDGE
-static zone_t act_zone;
+static struct kmem_cache act_cache;
#else
static Act *act_freelist;
static Act free_acts[ACT_STATIC_KLUDGE];
@@ -68,11 +67,8 @@ void
global_act_init()
{
#ifndef ACT_STATIC_KLUDGE
- act_zone = zinit(
- sizeof(struct Act), 0,
- ACT_MAX * sizeof(struct Act), /* XXX */
- ACT_CHUNK * sizeof(struct Act),
- 0, "activations");
+ kmem_cache_init(&act_cache, "Act", sizeof(struct Act), 0,
+ NULL, NULL, NULL, 0);
#else
int i;
@@ -104,7 +100,7 @@ kern_return_t act_create(task_t task, vm_offset_t user_stack,
int rc;
#ifndef ACT_STATIC_KLUDGE
- act = (Act*)zalloc(act_zone);
+ act = (Act*)kmem_cache_alloc(&act_cache);
if (act == 0)
return(KERN_RESOURCE_SHORTAGE);
#else
@@ -170,9 +166,9 @@ static void act_free(Act *inc)
/* Drop the task reference. */
task_deallocate(inc->task);
- /* Put the act back on the act zone */
+ /* Put the act back on the act cache */
#ifndef ACT_STATIC_KLUDGE
- zfree(act_zone, (vm_offset_t)inc);
+ kmem_cache_free(&act_cache, (vm_offset_t)inc);
#else
/* XXX ipt_lock(act_freelist); */
inc->ipt_next = act_freelist;
diff --git a/kern/bootstrap.c b/kern/bootstrap.c
index 2c63df4..68f40b4 100644
--- a/kern/bootstrap.c
+++ b/kern/bootstrap.c
@@ -42,6 +42,7 @@
#include <kern/debug.h>
#include <kern/host.h>
#include <kern/printf.h>
+#include <kern/kalloc.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/lock.h>
diff --git a/kern/ipc_tt.c b/kern/ipc_tt.c
index de4edc6..6d32e5b 100644
--- a/kern/ipc_tt.c
+++ b/kern/ipc_tt.c
@@ -36,6 +36,7 @@
#include <mach/thread_special_ports.h>
#include <vm/vm_kern.h>
#include <kern/debug.h>
+#include <kern/kalloc.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/ipc_kobject.h>
diff --git a/kern/kalloc.c b/kern/kalloc.c
deleted file mode 100644
index 8256305..0000000
--- a/kern/kalloc.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
- * Copyright (c) 1993,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
- * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
- * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
- * THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * File: kern/kalloc.c
- * Author: Avadis Tevanian, Jr.
- * Date: 1985
- *
- * General kernel memory allocator. This allocator is designed
- * to be used by the kernel to manage dynamic memory fast.
- */
-
-#include <mach/machine/vm_types.h>
-#include <mach/vm_param.h>
-
-#include <kern/debug.h>
-#include <kern/zalloc.h>
-#include <kern/kalloc.h>
-#include <vm/vm_kern.h>
-#include <vm/vm_object.h>
-#include <vm/vm_map.h>
-
-
-
-vm_map_t kalloc_map;
-vm_size_t kalloc_map_size = 64 * 1024 * 1024;
-vm_size_t kalloc_max;
-
-/*
- * All allocations of size less than kalloc_max are rounded to the
- * next highest power of 2. This allocator is built on top of
- * the zone allocator. A zone is created for each potential size
- * that we are willing to get in small blocks.
- *
- * We assume that kalloc_max is not greater than 64K;
- * thus 16 is a safe array size for k_zone and k_zone_name.
- */
-
-int first_k_zone = -1;
-struct zone *k_zone[16];
-static char *k_zone_name[16] = {
- "kalloc.1", "kalloc.2",
- "kalloc.4", "kalloc.8",
- "kalloc.16", "kalloc.32",
- "kalloc.64", "kalloc.128",
- "kalloc.256", "kalloc.512",
- "kalloc.1024", "kalloc.2048",
- "kalloc.4096", "kalloc.8192",
- "kalloc.16384", "kalloc.32768"
-};
-
-/*
- * Max number of elements per zone. zinit rounds things up correctly
- * Doing things this way permits each zone to have a different maximum size
- * based on need, rather than just guessing; it also
- * means its patchable in case you're wrong!
- */
-unsigned long k_zone_max[16] = {
- 1024, /* 1 Byte */
- 1024, /* 2 Byte */
- 1024, /* 4 Byte */
- 1024, /* 8 Byte */
- 1024, /* 16 Byte */
- 4096, /* 32 Byte */
- 4096, /* 64 Byte */
- 4096, /* 128 Byte */
- 4096, /* 256 Byte */
- 1024, /* 512 Byte */
- 1024, /* 1024 Byte */
- 1024, /* 2048 Byte */
- 1024, /* 4096 Byte */
- 4096, /* 8192 Byte */
- 64, /* 16384 Byte */
- 64, /* 32768 Byte */
-};
-
-/*
- * Initialize the memory allocator. This should be called only
- * once on a system wide basis (i.e. first processor to get here
- * does the initialization).
- *
- * This initializes all of the zones.
- */
-
-#ifndef NDEBUG
-static int kalloc_init_called;
-#endif
-
-void kalloc_init()
-{
- vm_offset_t min, max;
- vm_size_t size;
- register int i;
-
- assert (! kalloc_init_called);
-
- kalloc_map = kmem_suballoc(kernel_map, &min, &max,
- kalloc_map_size, FALSE);
-
- /*
- * Ensure that zones up to size 8192 bytes exist.
- * This is desirable because messages are allocated
- * with kalloc, and messages up through size 8192 are common.
- */
-
- if (PAGE_SIZE < 16*1024)
- kalloc_max = 16*1024;
- else
- kalloc_max = PAGE_SIZE;
-
- /*
- * Allocate a zone for each size we are going to handle.
- * We specify non-paged memory.
- */
- for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) {
- if (size < MINSIZE) {
- k_zone[i] = 0;
- continue;
- }
- if (size == MINSIZE) {
- first_k_zone = i;
- }
- k_zone[i] = zinit(size, 0, k_zone_max[i] * size, size,
- size >= PAGE_SIZE ? ZONE_COLLECTABLE : 0,
- k_zone_name[i]);
- }
-
-#ifndef NDEBUG
- kalloc_init_called = 1;
-#endif
-}
-
-vm_offset_t kalloc(size)
- vm_size_t size;
-{
- register int zindex;
- register vm_size_t allocsize;
- vm_offset_t addr;
-
- /* compute the size of the block that we will actually allocate */
-
- assert (kalloc_init_called);
-
- allocsize = size;
- if (size < kalloc_max) {
- allocsize = MINSIZE;
- zindex = first_k_zone;
- while (allocsize < size) {
- allocsize <<= 1;
- zindex++;
- }
- }
-
- /*
- * If our size is still small enough, check the queue for that size
- * and allocate.
- */
-
- if (allocsize < kalloc_max) {
- addr = zalloc(k_zone[zindex]);
- } else {
- if (kmem_alloc_wired(kalloc_map, &addr, allocsize)
- != KERN_SUCCESS)
- addr = 0;
- }
- return(addr);
-}
-
-vm_offset_t kget(size)
- vm_size_t size;
-{
- register int zindex;
- register vm_size_t allocsize;
- vm_offset_t addr;
-
- assert (kalloc_init_called);
-
- /* compute the size of the block that we will actually allocate */
-
- allocsize = size;
- if (size < kalloc_max) {
- allocsize = MINSIZE;
- zindex = first_k_zone;
- while (allocsize < size) {
- allocsize <<= 1;
- zindex++;
- }
- }
-
- /*
- * If our size is still small enough, check the queue for that size
- * and allocate.
- */
-
- if (allocsize < kalloc_max) {
- addr = zget(k_zone[zindex]);
- } else {
- /* This will never work, so we might as well panic */
- panic("kget");
- }
- return(addr);
-}
-
-void
-kfree(data, size)
- vm_offset_t data;
- vm_size_t size;
-{
- register int zindex;
- register vm_size_t freesize;
-
- assert (kalloc_init_called);
-
- freesize = size;
- if (size < kalloc_max) {
- freesize = MINSIZE;
- zindex = first_k_zone;
- while (freesize < size) {
- freesize <<= 1;
- zindex++;
- }
- }
-
- if (freesize < kalloc_max) {
- zfree(k_zone[zindex], data);
- } else {
- kmem_free(kalloc_map, data, freesize);
- }
-}
diff --git a/kern/kalloc.h b/kern/kalloc.h
index a80f6db..1330b54 100644
--- a/kern/kalloc.h
+++ b/kern/kalloc.h
@@ -28,11 +28,11 @@
#define _KERN_KALLOC_H_
#include <mach/machine/vm_types.h>
+#include <vm/vm_types.h>
-#define MINSIZE 16
+extern vm_map_t kalloc_map;
extern vm_offset_t kalloc (vm_size_t size);
-extern vm_offset_t kget (vm_size_t size);
extern void kfree (vm_offset_t data, vm_size_t size);
extern void kalloc_init (void);
diff --git a/kern/list.h b/kern/list.h
new file mode 100644
index 0000000..de7d115
--- /dev/null
+++ b/kern/list.h
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2009, 2010 Richard Braun.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _KERN_LIST_H
+#define _KERN_LIST_H
+
+#include <stddef.h>
+#include <sys/types.h>
+
+#define structof(ptr, type, member) \
+ ((type *)((char *)ptr - offsetof(type, member)))
+
+/*
+ * Structure used as both head and node.
+ *
+ * This implementation relies on using the same type for both heads and nodes.
+ *
+ * It is recommended to encode the use of struct list variables in their names,
+ * e.g. struct list free_list or struct list free_objects is a good hint for a
+ * list of free objects. A declaration like struct list free_node clearly
+ * indicates it is used as part of a node in the free list.
+ */
+struct list {
+ struct list *prev;
+ struct list *next;
+};
+
+/*
+ * Static list initializer.
+ */
+#define LIST_INITIALIZER(list) { &(list), &(list) }
+
+/*
+ * Initialize a list.
+ */
+static inline void list_init(struct list *list)
+{
+ list->prev = list;
+ list->next = list;
+}
+
+/*
+ * Initialize a list node.
+ *
+ * An entry is in no list when its node members point to NULL.
+ */
+static inline void list_node_init(struct list *node)
+{
+ node->prev = NULL;
+ node->next = NULL;
+}
+
+/*
+ * Return true if node is in no list.
+ */
+static inline int list_node_unlinked(const struct list *node)
+{
+ return node->prev == NULL;
+}
+
+/*
+ * Macro that evaluates to the address of the structure containing the
+ * given node based on the given type and member.
+ */
+#define list_entry(node, type, member) structof(node, type, member)
+
+/*
+ * Return the first node of a list.
+ */
+static inline struct list * list_first(const struct list *list)
+{
+ return list->next;
+}
+
+/*
+ * Return the last node of a list.
+ */
+static inline struct list * list_last(const struct list *list)
+{
+ return list->prev;
+}
+
+/*
+ * Return the node next to the given node.
+ */
+static inline struct list * list_next(const struct list *node)
+{
+ return node->next;
+}
+
+/*
+ * Return the node previous to the given node.
+ */
+static inline struct list * list_prev(const struct list *node)
+{
+ return node->prev;
+}
+
+/*
+ * Get the first entry of a list.
+ */
+#define list_first_entry(list, type, member) \
+ list_entry(list_first(list), type, member)
+
+/*
+ * Get the last entry of a list.
+ */
+#define list_last_entry(list, type, member) \
+ list_entry(list_last(list), type, member)
+
+/*
+ * Return true if node is after the last or before the first node of the list.
+ */
+static inline int list_end(const struct list *list, const struct list *node)
+{
+ return list == node;
+}
+
+/*
+ * Return true if list is empty.
+ */
+static inline int list_empty(const struct list *list)
+{
+ return list == list->next;
+}
+
+/*
+ * Return true if list contains exactly one node.
+ */
+static inline int list_singular(const struct list *list)
+{
+ return (list != list->next) && (list->next == list->prev);
+}
+
+/*
+ * Split list2 by moving its nodes up to (but not including) the given
+ * node into list1 (which can be in a stale state).
+ *
+ * If list2 is empty, or node is list2 or list2->next, nothing is done.
+ */
+static inline void list_split(struct list *list1, struct list *list2,
+ struct list *node)
+{
+ if (list_empty(list2) || (list2->next == node) || list_end(list2, node))
+ return;
+
+ list1->next = list2->next;
+ list1->next->prev = list1;
+
+ list1->prev = node->prev;
+ node->prev->next = list1;
+
+ list2->next = node;
+ node->prev = list2;
+}
+
+/*
+ * Append the nodes of list2 at the end of list1.
+ *
+ * After completion, list2 is stale.
+ */
+static inline void list_concat(struct list *list1, const struct list *list2)
+{
+ struct list *last1, *first2, *last2;
+
+ if (list_empty(list2))
+ return;
+
+ last1 = list1->prev;
+ first2 = list2->next;
+ last2 = list2->prev;
+
+ last1->next = first2;
+ first2->prev = last1;
+
+ last2->next = list1;
+ list1->prev = last2;
+}
+
+/*
+ * Set the new head of a list.
+ *
+ * This function is an optimized version of :
+ * list_init(&new_list);
+ * list_concat(&new_list, &old_list);
+ *
+ * After completion, old_head is stale.
+ */
+static inline void list_set_head(struct list *new_head,
+ const struct list *old_head)
+{
+ if (list_empty(old_head)) {
+ list_init(new_head);
+ return;
+ }
+
+ *new_head = *old_head;
+ new_head->next->prev = new_head;
+ new_head->prev->next = new_head;
+}
+
+/*
+ * Add a node between two nodes.
+ */
+static inline void list_add(struct list *prev, struct list *next,
+ struct list *node)
+{
+ next->prev = node;
+ node->next = next;
+
+ prev->next = node;
+ node->prev = prev;
+}
+
+/*
+ * Insert a node at the head of a list.
+ */
+static inline void list_insert(struct list *list, struct list *node)
+{
+ list_add(list, list->next, node);
+}
+
+/*
+ * Insert a node at the tail of a list.
+ */
+static inline void list_insert_tail(struct list *list, struct list *node)
+{
+ list_add(list->prev, list, node);
+}
+
+/*
+ * Insert a node before another node.
+ */
+static inline void list_insert_before(struct list *next, struct list *node)
+{
+ list_add(next->prev, next, node);
+}
+
+/*
+ * Insert a node after another node.
+ */
+static inline void list_insert_after(struct list *prev, struct list *node)
+{
+ list_add(prev, prev->next, node);
+}
+
+/*
+ * Remove a node from a list.
+ *
+ * After completion, the node is stale.
+ */
+static inline void list_remove(struct list *node)
+{
+ node->prev->next = node->next;
+ node->next->prev = node->prev;
+}
+
+/*
+ * Forge a loop to process all nodes of a list.
+ *
+ * The node must not be altered during the loop.
+ */
+#define list_for_each(list, node) \
+for (node = list_first(list); \
+ !list_end(list, node); \
+ node = list_next(node))
+
+/*
+ * Forge a loop to process all nodes of a list.
+ */
+#define list_for_each_safe(list, node, tmp) \
+for (node = list_first(list), tmp = list_next(node); \
+ !list_end(list, node); \
+ node = tmp, tmp = list_next(node))
+
+/*
+ * Version of list_for_each() that processes nodes backward.
+ */
+#define list_for_each_reverse(list, node) \
+for (node = list_last(list); \
+ !list_end(list, node); \
+ node = list_prev(node))
+
+/*
+ * Version of list_for_each_safe() that processes nodes backward.
+ */
+#define list_for_each_reverse_safe(list, node, tmp) \
+for (node = list_last(list), tmp = list_prev(node); \
+ !list_end(list, node); \
+ node = tmp, tmp = list_prev(node))
+
+/*
+ * Forge a loop to process all entries of a list.
+ *
+ * The entry node must not be altered during the loop.
+ */
+#define list_for_each_entry(list, entry, member) \
+for (entry = list_entry(list_first(list), typeof(*entry), member); \
+ !list_end(list, &entry->member); \
+ entry = list_entry(list_next(&entry->member), typeof(*entry), \
+ member))
+
+/*
+ * Forge a loop to process all entries of a list.
+ */
+#define list_for_each_entry_safe(list, entry, tmp, member) \
+for (entry = list_entry(list_first(list), typeof(*entry), member), \
+ tmp = list_entry(list_next(&entry->member), typeof(*entry), \
+ member); \
+ !list_end(list, &entry->member); \
+ entry = tmp, tmp = list_entry(list_next(&entry->member), \
+ typeof(*entry), member))
+
+/*
+ * Version of list_for_each_entry() that processes entries backward.
+ */
+#define list_for_each_entry_reverse(list, entry, member) \
+for (entry = list_entry(list_last(list), typeof(*entry), member); \
+ !list_end(list, &entry->member); \
+ entry = list_entry(list_prev(&entry->member), typeof(*entry), \
+ member))
+
+/*
+ * Version of list_for_each_entry_safe() that processes entries backward.
+ */
+#define list_for_each_entry_reverse_safe(list, entry, tmp, member) \
+for (entry = list_entry(list_last(list), typeof(*entry), member), \
+ tmp = list_entry(list_prev(&entry->member), typeof(*entry), \
+ member); \
+ !list_end(list, &entry->member); \
+ entry = tmp, tmp = list_entry(list_prev(&entry->member), \
+ typeof(*entry), member))
+
+#endif /* _KERN_LIST_H */
diff --git a/kern/mach_clock.c b/kern/mach_clock.c
index 4ba7c08..edf87f0 100644
--- a/kern/mach_clock.c
+++ b/kern/mach_clock.c
@@ -47,7 +47,6 @@
#include <kern/host.h>
#include <kern/lock.h>
#include <kern/mach_clock.h>
-#include <kern/mach_param.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
@@ -514,7 +513,7 @@ int timeclose()
/*
* Compatibility for device drivers.
* New code should use set_timeout/reset_timeout and private timers.
- * These code can't use a zone to allocate timers, because
+ * These code can't use a cache to allocate timers, because
* it can be called from interrupt handlers.
*/
diff --git a/kern/mach_param.h b/kern/mach_param.h
deleted file mode 100644
index 10376d8..0000000
--- a/kern/mach_param.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
- * Copyright (c) 1993,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
- * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
- * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
- * THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * File: kern/mach_param.h
- * Author: Avadis Tevanian, Jr., Michael Wayne Young
- * Date: 1986
- *
- * Mach system sizing parameters
- *
- */
-
-#ifndef _KERN_MACH_PARAM_H_
-#define _KERN_MACH_PARAM_H_
-
-#define THREAD_MAX 1024 /* Max number of threads */
-#define THREAD_CHUNK 64 /* Allocation chunk */
-
-#define TASK_MAX 1024 /* Max number of tasks */
-#define TASK_CHUNK 64 /* Allocation chunk */
-
-#define ACT_MAX 1024 /* Max number of acts */
-#define ACT_CHUNK 64 /* Allocation chunk */
-
-#define ACTPOOL_MAX 1024
-#define ACTPOOL_CHUNK 64
-
-#define PORT_MAX ((TASK_MAX * 3 + THREAD_MAX) /* kernel */ \
- + (THREAD_MAX * 2) /* user */ \
- + 40000) /* slop for objects */
- /* Number of ports, system-wide */
-
-#define SET_MAX (TASK_MAX + THREAD_MAX + 200)
- /* Max number of port sets */
-
-#define ITE_MAX (1 << 16) /* Max number of splay tree entries */
-
-#define SPACE_MAX (TASK_MAX + 5) /* Max number of IPC spaces */
-
-#define IMAR_MAX (1 << 10) /* Max number of msg-accepted reqs */
-
-#endif /* _KERN_MACH_PARAM_H_ */
diff --git a/kern/macro_help.h b/kern/macro_help.h
index e13b01d..a3d156b 100644
--- a/kern/macro_help.h
+++ b/kern/macro_help.h
@@ -45,8 +45,8 @@ boolean_t ALWAYS;
#define ALWAYS TRUE
#endif /* lint */
-#define MACRO_BEGIN do {
-#define MACRO_END } while (NEVER)
+#define MACRO_BEGIN ({
+#define MACRO_END })
#define MACRO_RETURN if (ALWAYS) return
diff --git a/kern/pc_sample.c b/kern/pc_sample.c
index c82707b..2cec907 100644
--- a/kern/pc_sample.c
+++ b/kern/pc_sample.c
@@ -31,6 +31,7 @@
#include <mach/std_types.h> /* pointer_t */
#include <mach/pc_sample.h>
#include <machine/trap.h>
+#include <kern/kalloc.h>
#include <kern/host.h>
#include <kern/thread.h>
#include <kern/pc_sample.h>
diff --git a/kern/priority.c b/kern/priority.c
index feddd8e..17541b8 100644
--- a/kern/priority.c
+++ b/kern/priority.c
@@ -39,7 +39,6 @@
#include <mach/machine.h>
#include <kern/host.h>
#include <kern/mach_clock.h>
-#include <kern/mach_param.h>
#include <kern/sched.h>
#include <kern/sched_prim.h>
#include <kern/thread.h>
diff --git a/kern/processor.c b/kern/processor.c
index 718ff3a..1986860 100644
--- a/kern/processor.c
+++ b/kern/processor.c
@@ -35,6 +35,7 @@
#include <mach/vm_param.h>
#include <kern/cpu_number.h>
#include <kern/debug.h>
+#include <kern/kalloc.h>
#include <kern/lock.h>
#include <kern/host.h>
#include <kern/ipc_tt.h>
@@ -46,8 +47,8 @@
#include <ipc/ipc_port.h>
#if MACH_HOST
-#include <kern/zalloc.h>
-zone_t pset_zone;
+#include <kern/slab.h>
+struct kmem_cache pset_cache;
#endif /* MACH_HOST */
@@ -112,10 +113,10 @@ void pset_sys_init(void)
register processor_t processor;
/*
- * Allocate the zone for processor sets.
+ * Allocate the cache for processor sets.
*/
- pset_zone = zinit(sizeof(struct processor_set), 0, 128*PAGE_SIZE,
- PAGE_SIZE, 0, "processor sets");
+ kmem_cache_init(&pset_cache, "processor_set",
+ sizeof(struct processor_set), 0, NULL, NULL, NULL, 0);
/*
* Give each processor a control port.
@@ -394,7 +395,7 @@ void pset_deallocate(
/*
* That's it, free data structure.
*/
- zfree(pset_zone, (vm_offset_t)pset);
+ kmem_cache_free(&pset_cache, (vm_offset_t)pset);
#endif /* MACH_HOST */
}
@@ -538,7 +539,7 @@ processor_set_create(
if (host == HOST_NULL)
return KERN_INVALID_ARGUMENT;
- pset = (processor_set_t) zalloc(pset_zone);
+ pset = (processor_set_t) kmem_cache_alloc(&pset_cache);
pset_init(pset);
pset_reference(pset); /* for new_set out argument */
pset_reference(pset); /* for new_name out argument */
diff --git a/kern/rbtree.c b/kern/rbtree.c
new file mode 100644
index 0000000..1c04c5c
--- /dev/null
+++ b/kern/rbtree.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright (c) 2010 Richard Braun.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <kern/assert.h>
+#include <kern/rbtree.h>
+#include <kern/rbtree_i.h>
+#include <sys/types.h>
+
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
+
+/*
+ * Return the index of a node in the children array of its parent.
+ *
+ * The parent parameter must not be null, and must be the parent of the
+ * given node.
+ */
+static inline int rbtree_index(const struct rbtree_node *node,
+ const struct rbtree_node *parent)
+{
+ assert(parent != NULL);
+ assert((node == NULL) || (rbtree_parent(node) == parent));
+
+ if (parent->children[RBTREE_LEFT] == node)
+ return RBTREE_LEFT;
+
+ assert(parent->children[RBTREE_RIGHT] == node);
+
+ return RBTREE_RIGHT;
+}
+
+/*
+ * Return the color of a node.
+ */
+static inline int rbtree_color(const struct rbtree_node *node)
+{
+ return node->parent & RBTREE_COLOR_MASK;
+}
+
+/*
+ * Return true if the node is red.
+ */
+static inline int rbtree_is_red(const struct rbtree_node *node)
+{
+ return rbtree_color(node) == RBTREE_COLOR_RED;
+}
+
+/*
+ * Return true if the node is black.
+ */
+static inline int rbtree_is_black(const struct rbtree_node *node)
+{
+ return rbtree_color(node) == RBTREE_COLOR_BLACK;
+}
+
+/*
+ * Set the parent of a node, retaining its current color.
+ */
+static inline void rbtree_set_parent(struct rbtree_node *node,
+ struct rbtree_node *parent)
+{
+ assert(rbtree_check_alignment(node));
+ assert(rbtree_check_alignment(parent));
+
+ node->parent = (unsigned long)parent | (node->parent & RBTREE_COLOR_MASK);
+}
+
+/*
+ * Set the color of a node, retaining its current parent.
+ */
+static inline void rbtree_set_color(struct rbtree_node *node, int color)
+{
+ assert((color & ~RBTREE_COLOR_MASK) == 0);
+ node->parent = (node->parent & RBTREE_PARENT_MASK) | color;
+}
+
+/*
+ * Set the color of a node to red, retaining its current parent.
+ */
+static inline void rbtree_set_red(struct rbtree_node *node)
+{
+ rbtree_set_color(node, RBTREE_COLOR_RED);
+}
+
+/*
+ * Set the color of a node to black, retaining its current parent.
+ */
+static inline void rbtree_set_black(struct rbtree_node *node)
+{
+ rbtree_set_color(node, RBTREE_COLOR_BLACK);
+}
+
+/*
+ * Perform a tree rotation, rooted at the given node.
+ *
+ * The direction parameter defines the rotation direction and is either
+ * RBTREE_LEFT or RBTREE_RIGHT.
+ */
+static void rbtree_rotate(struct rbtree *tree, struct rbtree_node *node,
+ int direction)
+{
+ struct rbtree_node *parent, *rnode;
+ int left, right;
+
+ left = direction;
+ right = 1 - left;
+ parent = rbtree_parent(node);
+ rnode = node->children[right];
+
+ node->children[right] = rnode->children[left];
+
+ if (rnode->children[left] != NULL)
+ rbtree_set_parent(rnode->children[left], node);
+
+ rnode->children[left] = node;
+ rbtree_set_parent(rnode, parent);
+
+ if (unlikely(parent == NULL))
+ tree->root = rnode;
+ else
+ parent->children[rbtree_index(node, parent)] = rnode;
+
+ rbtree_set_parent(node, rnode);
+}
+
+void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent,
+ int index, struct rbtree_node *node)
+{
+ struct rbtree_node *grand_parent, *uncle, *tmp;
+ int left, right;
+
+ assert(rbtree_check_alignment(parent));
+ assert(rbtree_check_alignment(node));
+
+ node->parent = (unsigned long)parent | RBTREE_COLOR_RED;
+ node->children[RBTREE_LEFT] = NULL;
+ node->children[RBTREE_RIGHT] = NULL;
+
+ if (unlikely(parent == NULL))
+ tree->root = node;
+ else
+ parent->children[index] = node;
+
+ for (;;) {
+ if (parent == NULL) {
+ rbtree_set_black(node);
+ break;
+ }
+
+ if (rbtree_is_black(parent))
+ break;
+
+ grand_parent = rbtree_parent(parent);
+ assert(grand_parent != NULL);
+
+ left = rbtree_index(parent, grand_parent);
+ right = 1 - left;
+
+ uncle = grand_parent->children[right];
+
+ /*
+ * Case 1: uncle is red. Flip colors and repeat at grand parent.
+ */
+ if ((uncle != NULL) && rbtree_is_red(uncle)) {
+ rbtree_set_black(uncle);
+ rbtree_set_black(parent);
+ rbtree_set_red(grand_parent);
+ node = grand_parent;
+ parent = rbtree_parent(node);
+ continue;
+ }
+
+ /*
+ * Case 2: node is the right child of its parent. Rotate left at parent
+ * to reduce to case 3.
+ */
+ if (parent->children[right] == node) {
+ rbtree_rotate(tree, parent, left);
+ tmp = node;
+ node = parent;
+ parent = tmp;
+ }
+
+ /*
+ * Case 3: node is the left child of its parent. Handle colors, rotate
+ * right at grand parent, and leave.
+ */
+ rbtree_set_black(parent);
+ rbtree_set_red(grand_parent);
+ rbtree_rotate(tree, grand_parent, right);
+ break;
+ }
+
+ assert(rbtree_is_black(tree->root));
+}
+
+void rbtree_remove(struct rbtree *tree, struct rbtree_node *node)
+{
+ struct rbtree_node *child, *parent, *brother;
+ int color, left, right;
+
+ if (node->children[RBTREE_LEFT] == NULL)
+ child = node->children[RBTREE_RIGHT];
+ else if (node->children[RBTREE_RIGHT] == NULL)
+ child = node->children[RBTREE_LEFT];
+ else {
+ struct rbtree_node *successor;
+
+ /*
+ * Two-children case: replace the node with its successor.
+ */
+
+ successor = node->children[RBTREE_RIGHT];
+
+ while (successor->children[RBTREE_LEFT] != NULL)
+ successor = successor->children[RBTREE_LEFT];
+
+ color = rbtree_color(successor);
+ child = successor->children[RBTREE_RIGHT];
+ parent = rbtree_parent(node);
+
+ if (unlikely(parent == NULL))
+ tree->root = successor;
+ else
+ parent->children[rbtree_index(node, parent)] = successor;
+
+ parent = rbtree_parent(successor);
+
+ /*
+ * Set parent directly to keep the original color.
+ */
+ successor->parent = node->parent;
+ successor->children[RBTREE_LEFT] = node->children[RBTREE_LEFT];
+ rbtree_set_parent(successor->children[RBTREE_LEFT], successor);
+
+ if (node == parent)
+ parent = successor;
+ else {
+ successor->children[RBTREE_RIGHT] = node->children[RBTREE_RIGHT];
+ rbtree_set_parent(successor->children[RBTREE_RIGHT], successor);
+ parent->children[RBTREE_LEFT] = child;
+
+ if (child != NULL)
+ rbtree_set_parent(child, parent);
+ }
+
+ goto update_color;
+ }
+
+ /*
+ * Node has at most one child.
+ */
+
+ color = rbtree_color(node);
+ parent = rbtree_parent(node);
+
+ if (child != NULL)
+ rbtree_set_parent(child, parent);
+
+ if (unlikely(parent == NULL))
+ tree->root = child;
+ else
+ parent->children[rbtree_index(node, parent)] = child;
+
+ /*
+ * The node has been removed, update the colors. The child pointer can
+ * be null, in which case it is considered a black leaf.
+ */
+update_color:
+ if (color == RBTREE_COLOR_RED)
+ return;
+
+ for (;;) {
+ if ((child != NULL) && rbtree_is_red(child)) {
+ rbtree_set_black(child);
+ break;
+ }
+
+ if (parent == NULL)
+ break;
+
+ left = rbtree_index(child, parent);
+ right = 1 - left;
+
+ brother = parent->children[right];
+
+ /*
+ * Case 1: brother is red. Recolor and rotate left at parent so that
+ * brother becomes black.
+ */
+ if (rbtree_is_red(brother)) {
+ rbtree_set_black(brother);
+ rbtree_set_red(parent);
+ rbtree_rotate(tree, parent, left);
+ brother = parent->children[right];
+ }
+
+ /*
+ * Case 2: brother has no red child. Recolor and repeat at parent.
+ */
+ if (((brother->children[RBTREE_LEFT] == NULL)
+ || rbtree_is_black(brother->children[RBTREE_LEFT]))
+ && ((brother->children[RBTREE_RIGHT] == NULL)
+ || rbtree_is_black(brother->children[RBTREE_RIGHT]))) {
+ rbtree_set_red(brother);
+ child = parent;
+ parent = rbtree_parent(child);
+ continue;
+ }
+
+ /*
+ * Case 3: brother's right child is black. Recolor and rotate right
+ * at brother to reduce to case 4.
+ */
+ if ((brother->children[right] == NULL)
+ || rbtree_is_black(brother->children[right])) {
+ rbtree_set_black(brother->children[left]);
+ rbtree_set_red(brother);
+ rbtree_rotate(tree, brother, right);
+ brother = parent->children[right];
+ }
+
+ /*
+ * Case 4: brother's left child is black. Exchange parent and brother
+ * colors (we already know brother is black), set brother's right child
+ * black, rotate left at parent and leave.
+ */
+ rbtree_set_color(brother, rbtree_color(parent));
+ rbtree_set_black(parent);
+ rbtree_set_black(brother->children[right]);
+ rbtree_rotate(tree, parent, left);
+ break;
+ }
+
+ assert((tree->root == NULL) || rbtree_is_black(tree->root));
+}
+
+struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index,
+ int direction)
+{
+ assert(rbtree_check_index(direction));
+
+ if (parent == NULL)
+ return NULL;
+
+ assert(rbtree_check_index(index));
+
+ if (index != direction)
+ return parent;
+
+ return rbtree_walk(parent, direction);
+}
+
+struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction)
+{
+ struct rbtree_node *prev, *cur;
+
+ assert(rbtree_check_index(direction));
+
+ prev = NULL;
+
+ for (cur = tree->root; cur != NULL; cur = cur->children[direction])
+ prev = cur;
+
+ return prev;
+}
+
+struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction)
+{
+ int left, right;
+
+ assert(rbtree_check_index(direction));
+
+ left = direction;
+ right = 1 - left;
+
+ if (node == NULL)
+ return NULL;
+
+ if (node->children[left] != NULL) {
+ node = node->children[left];
+
+ while (node->children[right] != NULL)
+ node = node->children[right];
+ } else {
+ struct rbtree_node *parent;
+ int index;
+
+ for (;;) {
+ parent = rbtree_parent(node);
+
+ if (parent == NULL)
+ return NULL;
+
+ index = rbtree_index(node, parent);
+ node = parent;
+
+ if (index == right)
+ break;
+ }
+ }
+
+ return node;
+}
+
+/*
+ * Return the left-most deepest child node of the given node.
+ */
+static struct rbtree_node * rbtree_find_deepest(struct rbtree_node *node)
+{
+ struct rbtree_node *parent;
+
+ assert(node != NULL);
+
+ for (;;) {
+ parent = node;
+ node = node->children[RBTREE_LEFT];
+
+ if (node == NULL) {
+ node = parent->children[RBTREE_RIGHT];
+
+ if (node == NULL)
+ return parent;
+ }
+ }
+}
+
+struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree)
+{
+ struct rbtree_node *node;
+
+ node = tree->root;
+
+ if (node == NULL)
+ return NULL;
+
+ return rbtree_find_deepest(node);
+}
+
+struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node)
+{
+ struct rbtree_node *parent;
+ int index;
+
+ if (node == NULL)
+ return NULL;
+
+ assert(node->children[RBTREE_LEFT] == NULL);
+ assert(node->children[RBTREE_RIGHT] == NULL);
+
+ parent = rbtree_parent(node);
+
+ if (parent == NULL)
+ return NULL;
+
+ index = rbtree_index(node, parent);
+ parent->children[index] = NULL;
+ node = parent->children[RBTREE_RIGHT];
+
+ if (node == NULL)
+ return parent;
+
+ return rbtree_find_deepest(node);
+}
diff --git a/kern/rbtree.h b/kern/rbtree.h
new file mode 100644
index 0000000..b6d62bf
--- /dev/null
+++ b/kern/rbtree.h
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _KERN_RBTREE_H
+#define _KERN_RBTREE_H
+
+#include <stddef.h>
+#include <kern/assert.h>
+#include <kern/macro_help.h>
+#include <kern/rbtree.h>
+#include <sys/types.h>
+
+#define structof(ptr, type, member) \
+ ((type *)((char *)ptr - offsetof(type, member)))
+
+/*
+ * Indexes of the left and right nodes in the children array of a node.
+ */
+#define RBTREE_LEFT 0
+#define RBTREE_RIGHT 1
+
+/*
+ * Red-black node.
+ */
+struct rbtree_node;
+
+/*
+ * Red-black tree.
+ */
+struct rbtree;
+
+/*
+ * Static tree initializer.
+ */
+#define RBTREE_INITIALIZER { NULL }
+
+#include "rbtree_i.h"
+
+/*
+ * Initialize a tree.
+ */
+static inline void rbtree_init(struct rbtree *tree)
+{
+ tree->root = NULL;
+}
+
+/*
+ * Initialize a node.
+ *
+ * A node is in no tree when its parent points to itself.
+ */
+static inline void rbtree_node_init(struct rbtree_node *node)
+{
+ assert(rbtree_check_alignment(node));
+
+ node->parent = (unsigned long)node | RBTREE_COLOR_RED;
+ node->children[RBTREE_LEFT] = NULL;
+ node->children[RBTREE_RIGHT] = NULL;
+}
+
+/*
+ * Return true if node is in no tree.
+ */
+static inline int rbtree_node_unlinked(const struct rbtree_node *node)
+{
+ return rbtree_parent(node) == node;
+}
+
+/*
+ * Macro that evaluates to the address of the structure containing the
+ * given node based on the given type and member.
+ */
+#define rbtree_entry(node, type, member) structof(node, type, member)
+
+/*
+ * Return true if tree is empty.
+ */
+static inline int rbtree_empty(const struct rbtree *tree)
+{
+ return tree->root == NULL;
+}
+
+/*
+ * Look up a node in a tree.
+ *
+ * Note that implementing the lookup algorithm as a macro gives two benefits:
+ * First, it avoids the overhead of a callback function. Next, the type of the
+ * cmp_fn parameter isn't rigid. The only guarantee offered by this
+ * implementation is that the key parameter is the first parameter given to
+ * cmp_fn. This way, users can pass only the value they need for comparison
+ * instead of e.g. allocating a full structure on the stack.
+ *
+ * See rbtree_insert().
+ */
+#define rbtree_lookup(tree, key, cmp_fn) \
+MACRO_BEGIN \
+ struct rbtree_node *cur; \
+ int diff; \
+ \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(key, cur); \
+ \
+ if (diff == 0) \
+ break; \
+ \
+ cur = cur->children[rbtree_d2i(diff)]; \
+ } \
+ \
+ cur; \
+MACRO_END
+
+/*
+ * Look up a node or one of its nearest nodes in a tree.
+ *
+ * This macro essentially acts as rbtree_lookup() but if no entry matched
+ * the key, an additional step is performed to obtain the next or previous
+ * node, depending on the direction (left or right).
+ *
+ * The constraints that apply to the key parameter are the same as for
+ * rbtree_lookup().
+ */
+#define rbtree_lookup_nearest(tree, key, cmp_fn, dir) \
+MACRO_BEGIN \
+ struct rbtree_node *cur, *prev; \
+ int diff, index; \
+ \
+ prev = NULL; \
+ index = -1; \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(key, cur); \
+ \
+ if (diff == 0) \
+ break; \
+ \
+ prev = cur; \
+ index = rbtree_d2i(diff); \
+ cur = cur->children[index]; \
+ } \
+ \
+ if (cur == NULL) \
+ cur = rbtree_nearest(prev, index, dir); \
+ \
+ cur; \
+MACRO_END
+
+/*
+ * Insert a node in a tree.
+ *
+ * This macro performs a standard lookup to obtain the insertion point of
+ * the given node in the tree (it is assumed that the inserted node never
+ * compares equal to any other entry in the tree) and links the node. It
+ * then It then checks red-black rules violations, and rebalances the tree
+ * if necessary.
+ *
+ * Unlike rbtree_lookup(), the cmp_fn parameter must compare two complete
+ * entries, so it is suggested to use two different comparison inline
+ * functions, such as myobj_cmp_lookup() and myobj_cmp_insert(). There is no
+ * guarantee about the order of the nodes given to the comparison function.
+ *
+ * See rbtree_lookup().
+ */
+#define rbtree_insert(tree, node, cmp_fn) \
+MACRO_BEGIN \
+ struct rbtree_node *cur, *prev; \
+ int diff, index; \
+ \
+ prev = NULL; \
+ index = -1; \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(node, cur); \
+ assert(diff != 0); \
+ prev = cur; \
+ index = rbtree_d2i(diff); \
+ cur = cur->children[index]; \
+ } \
+ \
+ rbtree_insert_rebalance(tree, prev, index, node); \
+MACRO_END
+
+/*
+ * Look up a node/slot pair in a tree.
+ *
+ * This macro essentially acts as rbtree_lookup() but in addition to a node,
+ * it also returns a slot, which identifies an insertion point in the tree.
+ * If the returned node is null, the slot can be used by rbtree_insert_slot()
+ * to insert without the overhead of an additional lookup. The slot is a
+ * simple unsigned long integer.
+ *
+ * The constraints that apply to the key parameter are the same as for
+ * rbtree_lookup().
+ */
+#define rbtree_lookup_slot(tree, key, cmp_fn, slot) \
+MACRO_BEGIN \
+ struct rbtree_node *cur, *prev; \
+ int diff, index; \
+ \
+ prev = NULL; \
+ index = 0; \
+ cur = (tree)->root; \
+ \
+ while (cur != NULL) { \
+ diff = cmp_fn(key, cur); \
+ \
+ if (diff == 0) \
+ break; \
+ \
+ prev = cur; \
+ index = rbtree_d2i(diff); \
+ cur = cur->children[index]; \
+ } \
+ \
+ (slot) = rbtree_slot(prev, index); \
+ cur; \
+MACRO_END
+
+/*
+ * Insert a node at an insertion point in a tree.
+ *
+ * This macro essentially acts as rbtree_insert() except that it doesn't
+ * obtain the insertion point with a standard lookup. The insertion point
+ * is obtained by calling rbtree_lookup_slot(). In addition, the new node
+ * must not compare equal to an existing node in the tree (i.e. the slot
+ * must denote a null node).
+ */
+#define rbtree_insert_slot(tree, slot, node) \
+MACRO_BEGIN \
+ struct rbtree_node *parent; \
+ int index; \
+ \
+ parent = rbtree_slot_parent(slot); \
+ index = rbtree_slot_index(slot); \
+ rbtree_insert_rebalance(tree, parent, index, node); \
+MACRO_END
+
+/*
+ * Remove a node from a tree.
+ *
+ * After completion, the node is stale.
+ */
+void rbtree_remove(struct rbtree *tree, struct rbtree_node *node);
+
+/*
+ * Return the first node of a tree.
+ */
+#define rbtree_first(tree) rbtree_firstlast(tree, RBTREE_LEFT)
+
+/*
+ * Return the last node of a tree.
+ */
+#define rbtree_last(tree) rbtree_firstlast(tree, RBTREE_RIGHT)
+
+/*
+ * Return the node previous to the given node.
+ */
+#define rbtree_prev(node) rbtree_walk(node, RBTREE_LEFT)
+
+/*
+ * Return the node next to the given node.
+ */
+#define rbtree_next(node) rbtree_walk(node, RBTREE_RIGHT)
+
+/*
+ * Forge a loop to process all nodes of a tree, removing them when visited.
+ *
+ * This macro can only be used to destroy a tree, so that the resources used
+ * by the entries can be released by the user. It basically removes all nodes
+ * without doing any color checking.
+ *
+ * After completion, all nodes and the tree root member are stale.
+ */
+#define rbtree_for_each_remove(tree, node, tmp) \
+for (node = rbtree_postwalk_deepest(tree), \
+ tmp = rbtree_postwalk_unlink(node); \
+ node != NULL; \
+ node = tmp, tmp = rbtree_postwalk_unlink(node)) \
+
+#endif /* _KERN_RBTREE_H */
diff --git a/kern/rbtree_i.h b/kern/rbtree_i.h
new file mode 100644
index 0000000..9befc92
--- /dev/null
+++ b/kern/rbtree_i.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _KERN_RBTREE_I_H
+#define _KERN_RBTREE_I_H
+
+#include <kern/assert.h>
+
+/*
+ * Red-black node structure.
+ *
+ * To reduce the number of branches and the instruction cache footprint,
+ * the left and right child pointers are stored in an array, and the symmetry
+ * of most tree operations is exploited by using left/right variables when
+ * referring to children.
+ *
+ * In addition, this implementation assumes that all nodes are 4-byte aligned,
+ * so that the least significant bit of the parent member can be used to store
+ * the color of the node. This is true for all modern 32 and 64 bits
+ * architectures, as long as the nodes aren't embedded in structures with
+ * special alignment constraints such as member packing.
+ */
+struct rbtree_node {
+ unsigned long parent;
+ struct rbtree_node *children[2];
+};
+
+/*
+ * Red-black tree structure.
+ */
+struct rbtree {
+ struct rbtree_node *root;
+};
+
+/*
+ * Masks applied on the parent member of a node to obtain either the
+ * color or the parent address.
+ */
+#define RBTREE_COLOR_MASK 0x1UL
+#define RBTREE_PARENT_MASK (~0x3UL)
+
+/*
+ * Node colors.
+ */
+#define RBTREE_COLOR_RED 0
+#define RBTREE_COLOR_BLACK 1
+
+/*
+ * Masks applied on slots to obtain either the child index or the parent
+ * address.
+ */
+#define RBTREE_SLOT_INDEX_MASK 0x1UL
+#define RBTREE_SLOT_PARENT_MASK (~RBTREE_SLOT_INDEX_MASK)
+
+/*
+ * Return true if the given pointer is suitably aligned.
+ */
+static inline int rbtree_check_alignment(const struct rbtree_node *node)
+{
+ return ((unsigned long)node & (~RBTREE_PARENT_MASK)) == 0;
+}
+
+/*
+ * Return true if the given index is a valid child index.
+ */
+static inline int rbtree_check_index(int index)
+{
+ return index == (index & 1);
+}
+
+/*
+ * Convert the result of a comparison into an index in the children array
+ * (0 or 1).
+ *
+ * This function is mostly used when looking up a node.
+ */
+static inline int rbtree_d2i(int diff)
+{
+ return !(diff <= 0);
+}
+
+/*
+ * Return the parent of a node.
+ */
+static inline struct rbtree_node * rbtree_parent(const struct rbtree_node *node)
+{
+ return (struct rbtree_node *)(node->parent & RBTREE_PARENT_MASK);
+}
+
+/*
+ * Translate an insertion point into a slot.
+ */
+static inline unsigned long rbtree_slot(struct rbtree_node *parent, int index)
+{
+ assert(rbtree_check_alignment(parent));
+ assert(rbtree_check_index(index));
+ return (unsigned long)parent | index;
+}
+
+/*
+ * Extract the parent address from a slot.
+ */
+static inline struct rbtree_node * rbtree_slot_parent(unsigned long slot)
+{
+ return (struct rbtree_node *)(slot & RBTREE_SLOT_PARENT_MASK);
+}
+
+/*
+ * Extract the index from a slot.
+ */
+static inline int rbtree_slot_index(unsigned long slot)
+{
+ return slot & RBTREE_SLOT_INDEX_MASK;
+}
+
+/*
+ * Insert a node in a tree, rebalancing it if necessary.
+ *
+ * The index parameter is the index in the children array of the parent where
+ * the new node is to be inserted. It is ignored if the parent is null.
+ *
+ * This function is intended to be used by the rbtree_insert() macro only.
+ */
+void rbtree_insert_rebalance(struct rbtree *tree, struct rbtree_node *parent,
+ int index, struct rbtree_node *node);
+
+/*
+ * Return the previous or next node relative to a location in a tree.
+ *
+ * The parent and index parameters define the location, which can be empty.
+ * The direction parameter is either RBTREE_LEFT (to obtain the previous
+ * node) or RBTREE_RIGHT (to obtain the next one).
+ */
+struct rbtree_node * rbtree_nearest(struct rbtree_node *parent, int index,
+ int direction);
+
+/*
+ * Return the first or last node of a tree.
+ *
+ * The direction parameter is either RBTREE_LEFT (to obtain the first node)
+ * or RBTREE_RIGHT (to obtain the last one).
+ */
+struct rbtree_node * rbtree_firstlast(const struct rbtree *tree, int direction);
+
+/*
+ * Return the node next to, or previous to the given node.
+ *
+ * The direction parameter is either RBTREE_LEFT (to obtain the previous node)
+ * or RBTREE_RIGHT (to obtain the next one).
+ */
+struct rbtree_node * rbtree_walk(struct rbtree_node *node, int direction);
+
+/*
+ * Return the left-most deepest node of a tree, which is the starting point of
+ * the postorder traversal performed by rbtree_for_each_remove().
+ */
+struct rbtree_node * rbtree_postwalk_deepest(const struct rbtree *tree);
+
+/*
+ * Unlink a node from its tree and return the next (right) node in postorder.
+ */
+struct rbtree_node * rbtree_postwalk_unlink(struct rbtree_node *node);
+
+#endif /* _KERN_RBTREE_I_H */
diff --git a/kern/server_loop.ch b/kern/server_loop.ch
index 1aa7edb..409e013 100644
--- a/kern/server_loop.ch
+++ b/kern/server_loop.ch
@@ -39,6 +39,7 @@
*/
#include <kern/debug.h>
+#include <kern/kalloc.h>
#include <mach/port.h>
#include <mach/message.h>
#include <vm/vm_kern.h> /* for kernel_map */
diff --git a/kern/slab.c b/kern/slab.c
new file mode 100644
index 0000000..38413e8
--- /dev/null
+++ b/kern/slab.c
@@ -0,0 +1,1576 @@
+/*
+ * Copyright (c) 2009, 2010, 2011 Richard Braun.
+ * Copyright (c) 2011 Maksym Planeta.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * Object caching and general purpose memory allocator.
+ *
+ * This allocator is based on the paper "The Slab Allocator: An Object-Caching
+ * Kernel Memory Allocator" by Jeff Bonwick.
+ *
+ * It allows the allocation of objects (i.e. fixed-size typed buffers) from
+ * caches and is efficient in both space and time. This implementation follows
+ * many of the indications from the paper mentioned. The most notable
+ * differences are outlined below.
+ *
+ * The per-cache self-scaling hash table for buffer-to-bufctl conversion,
+ * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by
+ * a red-black tree storing slabs, sorted by address. The use of a
+ * self-balancing tree for buffer-to-slab conversions provides a few advantages
+ * over a hash table. Unlike a hash table, a BST provides a "lookup nearest"
+ * operation, so obtaining the slab data (whether it is embedded in the slab or
+ * off slab) from a buffer address simply consists of a "lookup nearest towards
+ * 0" tree search. Storing slabs instead of buffers also considerably reduces
+ * the number of elements to retain. Finally, a self-balancing tree is a true
+ * self-scaling data structure, whereas a hash table requires periodic
+ * maintenance and complete resizing, which is expensive. The only drawback is
+ * that releasing a buffer to the slab layer takes logarithmic time instead of
+ * constant time. But as the data set size is kept reasonable (because slabs
+ * are stored instead of buffers) and because the CPU pool layer services most
+ * requests, avoiding many accesses to the slab layer, it is considered an
+ * acceptable tradeoff.
+ *
+ * This implementation uses per-cpu pools of objects, which service most
+ * allocation requests. These pools act as caches (but are named differently
+ * to avoid confusion with CPU caches) that reduce contention on multiprocessor
+ * systems. When a pool is empty and cannot provide an object, it is filled by
+ * transferring multiple objects from the slab layer. The symmetric case is
+ * handled likewise.
+ */
+
+#include <string.h>
+#include <kern/assert.h>
+#include <kern/mach_clock.h>
+#include <kern/printf.h>
+#include <kern/slab.h>
+#include <kern/kalloc.h>
+#include <kern/cpu_number.h>
+#include <mach/vm_param.h>
+#include <mach/machine/vm_types.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_types.h>
+#include <sys/types.h>
+
+#ifdef MACH_DEBUG
+#include <mach_debug/slab_info.h>
+#endif
+
+/*
+ * Utility macros.
+ */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define P2ALIGNED(x, a) (((x) & ((a) - 1)) == 0)
+#define ISP2(x) P2ALIGNED(x, x)
+#define P2ALIGN(x, a) ((x) & -(a))
+#define P2ROUND(x, a) (-(-(x) & -(a)))
+#define P2END(x, a) (-(~(x) & -(a)))
+#define likely(expr) __builtin_expect(!!(expr), 1)
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
+
+/*
+ * Minimum required alignment.
+ */
+#define KMEM_ALIGN_MIN 8
+
+/*
+ * Minimum number of buffers per slab.
+ *
+ * This value is ignored when the slab size exceeds a threshold.
+ */
+#define KMEM_MIN_BUFS_PER_SLAB 8
+
+/*
+ * Special slab size beyond which the minimum number of buffers per slab is
+ * ignored when computing the slab size of a cache.
+ */
+#define KMEM_SLAB_SIZE_THRESHOLD (8 * PAGE_SIZE)
+
+/*
+ * Special buffer size under which slab data is unconditionnally allocated
+ * from its associated slab.
+ */
+#define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8)
+
+/*
+ * Time (in seconds) between two garbage collection operations.
+ */
+#define KMEM_GC_INTERVAL (1 * hz)
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define KMEM_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Redzone guard word.
+ */
+#ifdef __LP64__
+#if _HOST_BIG_ENDIAN
+#define KMEM_REDZONE_WORD 0xfeedfacefeedfaceUL
+#else /* _HOST_BIG_ENDIAN */
+#define KMEM_REDZONE_WORD 0xcefaedfecefaedfeUL
+#endif /* _HOST_BIG_ENDIAN */
+#else /* __LP64__ */
+#if _HOST_BIG_ENDIAN
+#define KMEM_REDZONE_WORD 0xfeedfaceUL
+#else /* _HOST_BIG_ENDIAN */
+#define KMEM_REDZONE_WORD 0xcefaedfeUL
+#endif /* _HOST_BIG_ENDIAN */
+#endif /* __LP64__ */
+
+/*
+ * Redzone byte for padding.
+ */
+#define KMEM_REDZONE_BYTE 0xbb
+
+/*
+ * Size of the VM submap from which default backend functions allocate.
+ */
+#define KMEM_MAP_SIZE (64 * 1024 * 1024)
+
+/*
+ * Shift for the first kalloc cache size.
+ */
+#define KALLOC_FIRST_SHIFT 5
+
+/*
+ * Number of caches backing general purpose allocations.
+ */
+#define KALLOC_NR_CACHES 13
+
+/*
+ * Size of the VM submap for general purpose allocations.
+ */
+#define KALLOC_MAP_SIZE (64 * 1024 * 1024)
+
+/*
+ * Values the buftag state member can take.
+ */
+#ifdef __LP64__
+#if _HOST_BIG_ENDIAN
+#define KMEM_BUFTAG_ALLOC 0xa110c8eda110c8edUL
+#define KMEM_BUFTAG_FREE 0xf4eeb10cf4eeb10cUL
+#else /* _HOST_BIG_ENDIAN */
+#define KMEM_BUFTAG_ALLOC 0xedc810a1edc810a1UL
+#define KMEM_BUFTAG_FREE 0x0cb1eef40cb1eef4UL
+#endif /* _HOST_BIG_ENDIAN */
+#else /* __LP64__ */
+#if _HOST_BIG_ENDIAN
+#define KMEM_BUFTAG_ALLOC 0xa110c8edUL
+#define KMEM_BUFTAG_FREE 0xf4eeb10cUL
+#else /* _HOST_BIG_ENDIAN */
+#define KMEM_BUFTAG_ALLOC 0xedc810a1UL
+#define KMEM_BUFTAG_FREE 0x0cb1eef4UL
+#endif /* _HOST_BIG_ENDIAN */
+#endif /* __LP64__ */
+
+/*
+ * Free and uninitialized patterns.
+ *
+ * These values are unconditionnally 64-bit wide since buffers are at least
+ * 8-byte aligned.
+ */
+#if _HOST_BIG_ENDIAN
+#define KMEM_FREE_PATTERN 0xdeadbeefdeadbeefULL
+#define KMEM_UNINIT_PATTERN 0xbaddcafebaddcafeULL
+#else /* _HOST_BIG_ENDIAN */
+#define KMEM_FREE_PATTERN 0xefbeaddeefbeaddeULL
+#define KMEM_UNINIT_PATTERN 0xfecaddbafecaddbaULL
+#endif /* _HOST_BIG_ENDIAN */
+
+/*
+ * Cache flags.
+ *
+ * The flags don't change once set and can be tested without locking.
+ */
+#define KMEM_CF_NO_CPU_POOL 0x01 /* CPU pool layer disabled */
+#define KMEM_CF_SLAB_EXTERNAL 0x02 /* Slab data is off slab */
+#define KMEM_CF_NO_RECLAIM 0x04 /* Slabs are not reclaimable */
+#define KMEM_CF_VERIFY 0x08 /* Debugging facilities enabled */
+#define KMEM_CF_DIRECT 0x10 /* No buf-to-slab tree lookup */
+
+/*
+ * Options for kmem_cache_alloc_verify().
+ */
+#define KMEM_AV_NOCONSTRUCT 0
+#define KMEM_AV_CONSTRUCT 1
+
+/*
+ * Error codes for kmem_cache_error().
+ */
+#define KMEM_ERR_INVALID 0 /* Invalid address being freed */
+#define KMEM_ERR_DOUBLEFREE 1 /* Freeing already free address */
+#define KMEM_ERR_BUFTAG 2 /* Invalid buftag content */
+#define KMEM_ERR_MODIFIED 3 /* Buffer modified while free */
+#define KMEM_ERR_REDZONE 4 /* Redzone violation */
+
+#if SLAB_USE_CPU_POOLS
+/*
+ * Available CPU pool types.
+ *
+ * For each entry, the CPU pool size applies from the entry buf_size
+ * (excluded) up to (and including) the buf_size of the preceding entry.
+ *
+ * See struct kmem_cpu_pool_type for a description of the values.
+ */
+static struct kmem_cpu_pool_type kmem_cpu_pool_types[] = {
+ { 32768, 1, 0, NULL },
+ { 4096, 8, CPU_L1_SIZE, NULL },
+ { 256, 64, CPU_L1_SIZE, NULL },
+ { 0, 128, CPU_L1_SIZE, NULL }
+};
+
+/*
+ * Caches where CPU pool arrays are allocated from.
+ */
+static struct kmem_cache kmem_cpu_array_caches[ARRAY_SIZE(kmem_cpu_pool_types)];
+#endif /* SLAB_USE_CPU_POOLS */
+
+/*
+ * Cache for off slab data.
+ */
+static struct kmem_cache kmem_slab_cache;
+
+/*
+ * General purpose caches array.
+ */
+static struct kmem_cache kalloc_caches[KALLOC_NR_CACHES];
+
+/*
+ * List of all caches managed by the allocator.
+ */
+static struct list kmem_cache_list;
+static unsigned int kmem_nr_caches;
+static simple_lock_data_t __attribute__((used)) kmem_cache_list_lock;
+
+/*
+ * VM submap for slab caches (except general purpose allocations).
+ */
+static struct vm_map kmem_map_store;
+vm_map_t kmem_map = &kmem_map_store;
+
+/*
+ * VM submap for general purpose allocations.
+ */
+static struct vm_map kalloc_map_store;
+vm_map_t kalloc_map = &kalloc_map_store;
+
+/*
+ * Time of the last memory reclaim, in clock ticks.
+ */
+static unsigned int kmem_gc_last_tick;
+
+#define kmem_error(format, ...) \
+ printf("mem: error: %s(): " format "\n", __func__, \
+ ## __VA_ARGS__)
+
+#define kmem_warn(format, ...) \
+ printf("mem: warning: %s(): " format "\n", __func__, \
+ ## __VA_ARGS__)
+
+#define kmem_print(format, ...) \
+ printf(format "\n", ## __VA_ARGS__)
+
+static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error,
+ void *arg);
+static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache);
+static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf);
+
+static void * kmem_buf_verify_bytes(void *buf, void *pattern, size_t size)
+{
+ char *ptr, *pattern_ptr, *end;
+
+ end = buf + size;
+
+ for (ptr = buf, pattern_ptr = pattern; ptr < end; ptr++, pattern_ptr++)
+ if (*ptr != *pattern_ptr)
+ return ptr;
+
+ return NULL;
+}
+
+static void * kmem_buf_verify(void *buf, uint64_t pattern, vm_size_t size)
+{
+ uint64_t *ptr, *end;
+
+ assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ assert(P2ALIGNED(size, sizeof(uint64_t)));
+
+ end = buf + size;
+
+ for (ptr = buf; ptr < end; ptr++)
+ if (*ptr != pattern)
+ return kmem_buf_verify_bytes(ptr, &pattern, sizeof(pattern));
+
+ return NULL;
+}
+
+static void kmem_buf_fill(void *buf, uint64_t pattern, size_t size)
+{
+ uint64_t *ptr, *end;
+
+ assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ assert(P2ALIGNED(size, sizeof(uint64_t)));
+
+ end = buf + size;
+
+ for (ptr = buf; ptr < end; ptr++)
+ *ptr = pattern;
+}
+
+static void * kmem_buf_verify_fill(void *buf, uint64_t old, uint64_t new,
+ size_t size)
+{
+ uint64_t *ptr, *end;
+
+ assert(P2ALIGNED((unsigned long)buf, sizeof(uint64_t)));
+ assert(P2ALIGNED(size, sizeof(uint64_t)));
+
+ end = buf + size;
+
+ for (ptr = buf; ptr < end; ptr++) {
+ if (*ptr != old)
+ return kmem_buf_verify_bytes(ptr, &old, sizeof(old));
+
+ *ptr = new;
+ }
+
+ return NULL;
+}
+
+static inline union kmem_bufctl *
+kmem_buf_to_bufctl(void *buf, struct kmem_cache *cache)
+{
+ return (union kmem_bufctl *)(buf + cache->bufctl_dist);
+}
+
+static inline struct kmem_buftag *
+kmem_buf_to_buftag(void *buf, struct kmem_cache *cache)
+{
+ return (struct kmem_buftag *)(buf + cache->buftag_dist);
+}
+
+static inline void * kmem_bufctl_to_buf(union kmem_bufctl *bufctl,
+ struct kmem_cache *cache)
+{
+ return (void *)bufctl - cache->bufctl_dist;
+}
+
+static vm_offset_t kmem_pagealloc(vm_size_t size)
+{
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ kr = kmem_alloc_wired(kmem_map, &addr, size);
+
+ if (kr != KERN_SUCCESS)
+ return 0;
+
+ return addr;
+}
+
+static void kmem_pagefree(vm_offset_t ptr, vm_size_t size)
+{
+ kmem_free(kmem_map, ptr, size);
+}
+
+static void kmem_slab_create_verify(struct kmem_slab *slab,
+ struct kmem_cache *cache)
+{
+ struct kmem_buftag *buftag;
+ size_t buf_size;
+ unsigned long buffers;
+ void *buf;
+
+ buf_size = cache->buf_size;
+ buf = slab->addr;
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
+ kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
+ buftag->state = KMEM_BUFTAG_FREE;
+ buf += buf_size;
+ buftag = kmem_buf_to_buftag(buf, cache);
+ }
+}
+
+/*
+ * Create an empty slab for a cache.
+ *
+ * The caller must drop all locks before calling this function.
+ */
+static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache,
+ size_t color)
+{
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+ size_t buf_size;
+ unsigned long buffers;
+ void *slab_buf;
+
+ if (cache->slab_alloc_fn == NULL)
+ slab_buf = (void *)kmem_pagealloc(cache->slab_size);
+ else
+ slab_buf = (void *)cache->slab_alloc_fn(cache->slab_size);
+
+ if (slab_buf == NULL)
+ return NULL;
+
+ if (cache->flags & KMEM_CF_SLAB_EXTERNAL) {
+ assert(!(cache->flags & KMEM_CF_NO_RECLAIM));
+ slab = (struct kmem_slab *)kmem_cache_alloc(&kmem_slab_cache);
+
+ if (slab == NULL) {
+ if (cache->slab_free_fn == NULL)
+ kmem_pagefree((vm_offset_t)slab_buf, cache->slab_size);
+ else
+ cache->slab_free_fn((vm_offset_t)slab_buf, cache->slab_size);
+
+ return NULL;
+ }
+ } else {
+ slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1;
+ }
+
+ list_node_init(&slab->list_node);
+ rbtree_node_init(&slab->tree_node);
+ slab->nr_refs = 0;
+ slab->first_free = NULL;
+ slab->addr = slab_buf + color;
+
+ buf_size = cache->buf_size;
+ bufctl = kmem_buf_to_bufctl(slab->addr, cache);
+
+ for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
+ bufctl->next = slab->first_free;
+ slab->first_free = bufctl;
+ bufctl = (union kmem_bufctl *)((void *)bufctl + buf_size);
+ }
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kmem_slab_create_verify(slab, cache);
+
+ return slab;
+}
+
+static void kmem_slab_destroy_verify(struct kmem_slab *slab,
+ struct kmem_cache *cache)
+{
+ struct kmem_buftag *buftag;
+ size_t buf_size;
+ unsigned long buffers;
+ void *buf, *addr;
+
+ buf_size = cache->buf_size;
+ buf = slab->addr;
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) {
+ if (buftag->state != KMEM_BUFTAG_FREE)
+ kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
+
+ addr = kmem_buf_verify(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
+
+ if (addr != NULL)
+ kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr);
+
+ buf += buf_size;
+ buftag = kmem_buf_to_buftag(buf, cache);
+ }
+}
+
+/*
+ * Destroy a slab.
+ *
+ * The caller must drop all locks before calling this function.
+ */
+static void kmem_slab_destroy(struct kmem_slab *slab, struct kmem_cache *cache)
+{
+ vm_offset_t slab_buf;
+
+ assert(slab->nr_refs == 0);
+ assert(slab->first_free != NULL);
+ assert(!(cache->flags & KMEM_CF_NO_RECLAIM));
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kmem_slab_destroy_verify(slab, cache);
+
+ slab_buf = (vm_offset_t)P2ALIGN((unsigned long)slab->addr, PAGE_SIZE);
+
+ if (cache->slab_free_fn == NULL)
+ kmem_pagefree(slab_buf, cache->slab_size);
+ else
+ cache->slab_free_fn(slab_buf, cache->slab_size);
+
+ if (cache->flags & KMEM_CF_SLAB_EXTERNAL)
+ kmem_cache_free(&kmem_slab_cache, (vm_offset_t)slab);
+}
+
+static inline int kmem_slab_use_tree(int flags)
+{
+ return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY);
+}
+
+static inline int kmem_slab_cmp_lookup(const void *addr,
+ const struct rbtree_node *node)
+{
+ struct kmem_slab *slab;
+
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+
+ if (addr == slab->addr)
+ return 0;
+ else if (addr < slab->addr)
+ return -1;
+ else
+ return 1;
+}
+
+static inline int kmem_slab_cmp_insert(const struct rbtree_node *a,
+ const struct rbtree_node *b)
+{
+ struct kmem_slab *slab;
+
+ slab = rbtree_entry(a, struct kmem_slab, tree_node);
+ return kmem_slab_cmp_lookup(slab->addr, b);
+}
+
+#if SLAB_USE_CPU_POOLS
+static void kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool,
+ struct kmem_cache *cache)
+{
+ simple_lock_init(&cpu_pool->lock);
+ cpu_pool->flags = cache->flags;
+ cpu_pool->size = 0;
+ cpu_pool->transfer_size = 0;
+ cpu_pool->nr_objs = 0;
+ cpu_pool->array = NULL;
+}
+
+/*
+ * Return a CPU pool.
+ *
+ * This function will generally return the pool matching the CPU running the
+ * calling thread. Because of context switches and thread migration, the
+ * caller might be running on another processor after this function returns.
+ * Although not optimal, this should rarely happen, and it doesn't affect the
+ * allocator operations in any other way, as CPU pools are always valid, and
+ * their access is serialized by a lock.
+ */
+static inline struct kmem_cpu_pool * kmem_cpu_pool_get(struct kmem_cache *cache)
+{
+ return &cache->cpu_pools[cpu_number()];
+}
+
+static inline void kmem_cpu_pool_build(struct kmem_cpu_pool *cpu_pool,
+ struct kmem_cache *cache, void **array)
+{
+ cpu_pool->size = cache->cpu_pool_type->array_size;
+ cpu_pool->transfer_size = (cpu_pool->size
+ + KMEM_CPU_POOL_TRANSFER_RATIO - 1)
+ / KMEM_CPU_POOL_TRANSFER_RATIO;
+ cpu_pool->array = array;
+}
+
+static inline void * kmem_cpu_pool_pop(struct kmem_cpu_pool *cpu_pool)
+{
+ cpu_pool->nr_objs--;
+ return cpu_pool->array[cpu_pool->nr_objs];
+}
+
+static inline void kmem_cpu_pool_push(struct kmem_cpu_pool *cpu_pool, void *obj)
+{
+ cpu_pool->array[cpu_pool->nr_objs] = obj;
+ cpu_pool->nr_objs++;
+}
+
+static int kmem_cpu_pool_fill(struct kmem_cpu_pool *cpu_pool,
+ struct kmem_cache *cache)
+{
+ void *obj;
+ int i;
+
+ simple_lock(&cache->lock);
+
+ for (i = 0; i < cpu_pool->transfer_size; i++) {
+ obj = kmem_cache_alloc_from_slab(cache);
+
+ if (obj == NULL)
+ break;
+
+ kmem_cpu_pool_push(cpu_pool, obj);
+ }
+
+ simple_unlock(&cache->lock);
+
+ return i;
+}
+
+static void kmem_cpu_pool_drain(struct kmem_cpu_pool *cpu_pool,
+ struct kmem_cache *cache)
+{
+ void *obj;
+ int i;
+
+ simple_lock(&cache->lock);
+
+ for (i = cpu_pool->transfer_size; i > 0; i--) {
+ obj = kmem_cpu_pool_pop(cpu_pool);
+ kmem_cache_free_to_slab(cache, obj);
+ }
+
+ simple_unlock(&cache->lock);
+}
+#endif /* SLAB_USE_CPU_POOLS */
+
+static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error,
+ void *arg)
+{
+ struct kmem_buftag *buftag;
+
+ kmem_error("cache: %s, buffer: %p", cache->name, (void *)buf);
+
+ switch(error) {
+ case KMEM_ERR_INVALID:
+ kmem_error("freeing invalid address");
+ break;
+ case KMEM_ERR_DOUBLEFREE:
+ kmem_error("attempting to free the same address twice");
+ break;
+ case KMEM_ERR_BUFTAG:
+ buftag = arg;
+ kmem_error("invalid buftag content, buftag state: %p",
+ (void *)buftag->state);
+ break;
+ case KMEM_ERR_MODIFIED:
+ kmem_error("free buffer modified, fault address: %p, "
+ "offset in buffer: %td", arg, arg - buf);
+ break;
+ case KMEM_ERR_REDZONE:
+ kmem_error("write beyond end of buffer, fault address: %p, "
+ "offset in buffer: %td", arg, arg - buf);
+ break;
+ default:
+ kmem_error("unknown error");
+ }
+
+ /*
+ * Never reached.
+ */
+}
+
+/*
+ * Compute an appropriate slab size for the given cache.
+ *
+ * Once the slab size is known, this function sets the related properties
+ * (buffers per slab and maximum color). It can also set the KMEM_CF_DIRECT
+ * and/or KMEM_CF_SLAB_EXTERNAL flags depending on the resulting layout.
+ */
+static void kmem_cache_compute_sizes(struct kmem_cache *cache, int flags)
+{
+ size_t i, buffers, buf_size, slab_size, free_slab_size, optimal_size;
+ size_t waste, waste_min;
+ int embed, optimal_embed = optimal_embed;
+
+ buf_size = cache->buf_size;
+
+ if (buf_size < KMEM_BUF_SIZE_THRESHOLD)
+ flags |= KMEM_CACHE_NOOFFSLAB;
+
+ i = 0;
+ waste_min = (size_t)-1;
+
+ do {
+ i++;
+ slab_size = P2ROUND(i * buf_size, PAGE_SIZE);
+ free_slab_size = slab_size;
+
+ if (flags & KMEM_CACHE_NOOFFSLAB)
+ free_slab_size -= sizeof(struct kmem_slab);
+
+ buffers = free_slab_size / buf_size;
+ waste = free_slab_size % buf_size;
+
+ if (buffers > i)
+ i = buffers;
+
+ if (flags & KMEM_CACHE_NOOFFSLAB)
+ embed = 1;
+ else if (sizeof(struct kmem_slab) <= waste) {
+ embed = 1;
+ waste -= sizeof(struct kmem_slab);
+ } else {
+ embed = 0;
+ }
+
+ if (waste <= waste_min) {
+ waste_min = waste;
+ optimal_size = slab_size;
+ optimal_embed = embed;
+ }
+ } while ((buffers < KMEM_MIN_BUFS_PER_SLAB)
+ && (slab_size < KMEM_SLAB_SIZE_THRESHOLD));
+
+ assert(!(flags & KMEM_CACHE_NOOFFSLAB) || optimal_embed);
+
+ cache->slab_size = optimal_size;
+ slab_size = cache->slab_size - (optimal_embed
+ ? sizeof(struct kmem_slab)
+ : 0);
+ cache->bufs_per_slab = slab_size / buf_size;
+ cache->color_max = slab_size % buf_size;
+
+ if (cache->color_max >= PAGE_SIZE)
+ cache->color_max = PAGE_SIZE - 1;
+
+ if (optimal_embed) {
+ if (cache->slab_size == PAGE_SIZE)
+ cache->flags |= KMEM_CF_DIRECT;
+ } else {
+ cache->flags |= KMEM_CF_SLAB_EXTERNAL;
+ }
+}
+
+void kmem_cache_init(struct kmem_cache *cache, const char *name,
+ size_t obj_size, size_t align, kmem_cache_ctor_t ctor,
+ kmem_slab_alloc_fn_t slab_alloc_fn,
+ kmem_slab_free_fn_t slab_free_fn, int flags)
+{
+#if SLAB_USE_CPU_POOLS
+ struct kmem_cpu_pool_type *cpu_pool_type;
+ size_t i;
+#endif /* SLAB_USE_CPU_POOLS */
+ size_t buf_size;
+
+#if SLAB_VERIFY
+ cache->flags = KMEM_CF_VERIFY;
+#else /* SLAB_VERIFY */
+ cache->flags = 0;
+#endif /* SLAB_VERIFY */
+
+ if (flags & KMEM_CACHE_NOCPUPOOL)
+ cache->flags |= KMEM_CF_NO_CPU_POOL;
+
+ if (flags & KMEM_CACHE_NORECLAIM) {
+ assert(slab_free_fn == NULL);
+ flags |= KMEM_CACHE_NOOFFSLAB;
+ cache->flags |= KMEM_CF_NO_RECLAIM;
+ }
+
+ if (flags & KMEM_CACHE_VERIFY)
+ cache->flags |= KMEM_CF_VERIFY;
+
+ if (align < KMEM_ALIGN_MIN)
+ align = KMEM_ALIGN_MIN;
+
+ assert(obj_size > 0);
+ assert(ISP2(align));
+ assert(align < PAGE_SIZE);
+
+ buf_size = P2ROUND(obj_size, align);
+
+ simple_lock_init(&cache->lock);
+ list_node_init(&cache->node);
+ list_init(&cache->partial_slabs);
+ list_init(&cache->free_slabs);
+ rbtree_init(&cache->active_slabs);
+ cache->obj_size = obj_size;
+ cache->align = align;
+ cache->buf_size = buf_size;
+ cache->bufctl_dist = buf_size - sizeof(union kmem_bufctl);
+ cache->color = 0;
+ cache->nr_objs = 0;
+ cache->nr_bufs = 0;
+ cache->nr_slabs = 0;
+ cache->nr_free_slabs = 0;
+ cache->ctor = ctor;
+ cache->slab_alloc_fn = slab_alloc_fn;
+ cache->slab_free_fn = slab_free_fn;
+ strncpy(cache->name, name, sizeof(cache->name));
+ cache->name[sizeof(cache->name) - 1] = '\0';
+ cache->buftag_dist = 0;
+ cache->redzone_pad = 0;
+
+ if (cache->flags & KMEM_CF_VERIFY) {
+ cache->bufctl_dist = buf_size;
+ cache->buftag_dist = cache->bufctl_dist + sizeof(union kmem_bufctl);
+ cache->redzone_pad = cache->bufctl_dist - cache->obj_size;
+ buf_size += sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag);
+ buf_size = P2ROUND(buf_size, align);
+ cache->buf_size = buf_size;
+ }
+
+ kmem_cache_compute_sizes(cache, flags);
+
+#if SLAB_USE_CPU_POOLS
+ for (cpu_pool_type = kmem_cpu_pool_types;
+ buf_size <= cpu_pool_type->buf_size;
+ cpu_pool_type++);
+
+ cache->cpu_pool_type = cpu_pool_type;
+
+ for (i = 0; i < ARRAY_SIZE(cache->cpu_pools); i++)
+ kmem_cpu_pool_init(&cache->cpu_pools[i], cache);
+#endif /* SLAB_USE_CPU_POOLS */
+
+ simple_lock(&kmem_cache_list_lock);
+ list_insert_tail(&kmem_cache_list, &cache->node);
+ kmem_nr_caches++;
+ simple_unlock(&kmem_cache_list_lock);
+}
+
+static inline int kmem_cache_empty(struct kmem_cache *cache)
+{
+ return cache->nr_objs == cache->nr_bufs;
+}
+
+static int kmem_cache_grow(struct kmem_cache *cache)
+{
+ struct kmem_slab *slab;
+ size_t color;
+ int empty;
+
+ simple_lock(&cache->lock);
+
+ if (!kmem_cache_empty(cache)) {
+ simple_unlock(&cache->lock);
+ return 1;
+ }
+
+ color = cache->color;
+ cache->color += cache->align;
+
+ if (cache->color > cache->color_max)
+ cache->color = 0;
+
+ simple_unlock(&cache->lock);
+
+ slab = kmem_slab_create(cache, color);
+
+ simple_lock(&cache->lock);
+
+ if (slab != NULL) {
+ list_insert_tail(&cache->free_slabs, &slab->list_node);
+ cache->nr_bufs += cache->bufs_per_slab;
+ cache->nr_slabs++;
+ cache->nr_free_slabs++;
+ }
+
+ /*
+ * Even if our slab creation failed, another thread might have succeeded
+ * in growing the cache.
+ */
+ empty = kmem_cache_empty(cache);
+
+ simple_unlock(&cache->lock);
+
+ return !empty;
+}
+
+static void kmem_cache_reap(struct kmem_cache *cache)
+{
+ struct kmem_slab *slab;
+ struct list dead_slabs;
+
+ if (cache->flags & KMEM_CF_NO_RECLAIM)
+ return;
+
+ list_init(&dead_slabs);
+
+ simple_lock(&cache->lock);
+
+ while (!list_empty(&cache->free_slabs)) {
+ slab = list_first_entry(&cache->free_slabs, struct kmem_slab,
+ list_node);
+ list_remove(&slab->list_node);
+ list_insert(&dead_slabs, &slab->list_node);
+ cache->nr_bufs -= cache->bufs_per_slab;
+ cache->nr_slabs--;
+ cache->nr_free_slabs--;
+ }
+
+ simple_unlock(&cache->lock);
+
+ while (!list_empty(&dead_slabs)) {
+ slab = list_first_entry(&dead_slabs, struct kmem_slab, list_node);
+ list_remove(&slab->list_node);
+ kmem_slab_destroy(slab, cache);
+ }
+}
+
+/*
+ * Allocate a raw (unconstructed) buffer from the slab layer of a cache.
+ *
+ * The cache must be locked before calling this function.
+ */
+static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache)
+{
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+
+ if (!list_empty(&cache->partial_slabs))
+ slab = list_first_entry(&cache->partial_slabs, struct kmem_slab,
+ list_node);
+ else if (!list_empty(&cache->free_slabs))
+ slab = list_first_entry(&cache->free_slabs, struct kmem_slab,
+ list_node);
+ else
+ return NULL;
+
+ bufctl = slab->first_free;
+ assert(bufctl != NULL);
+ slab->first_free = bufctl->next;
+ slab->nr_refs++;
+ cache->nr_objs++;
+
+ /*
+ * The slab has become complete.
+ */
+ if (slab->nr_refs == cache->bufs_per_slab) {
+ list_remove(&slab->list_node);
+
+ if (slab->nr_refs == 1)
+ cache->nr_free_slabs--;
+ } else if (slab->nr_refs == 1) {
+ /*
+ * The slab has become partial.
+ */
+ list_remove(&slab->list_node);
+ list_insert_tail(&cache->partial_slabs, &slab->list_node);
+ cache->nr_free_slabs--;
+ } else if (!list_singular(&cache->partial_slabs)) {
+ struct list *node;
+ struct kmem_slab *tmp;
+
+ /*
+ * The slab remains partial. If there are more than one partial slabs,
+ * maintain the list sorted.
+ */
+
+ assert(slab->nr_refs > 1);
+
+ for (node = list_prev(&slab->list_node);
+ !list_end(&cache->partial_slabs, node);
+ node = list_prev(node)) {
+ tmp = list_entry(node, struct kmem_slab, list_node);
+
+ if (tmp->nr_refs >= slab->nr_refs)
+ break;
+ }
+
+ /*
+ * If the direct neighbor was found, the list is already sorted.
+ * If no slab was found, the slab is inserted at the head of the list.
+ */
+ if (node != list_prev(&slab->list_node)) {
+ list_remove(&slab->list_node);
+ list_insert_after(node, &slab->list_node);
+ }
+ }
+
+ if ((slab->nr_refs == 1) && kmem_slab_use_tree(cache->flags))
+ rbtree_insert(&cache->active_slabs, &slab->tree_node,
+ kmem_slab_cmp_insert);
+
+ return kmem_bufctl_to_buf(bufctl, cache);
+}
+
+/*
+ * Release a buffer to the slab layer of a cache.
+ *
+ * The cache must be locked before calling this function.
+ */
+static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
+{
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+
+ if (cache->flags & KMEM_CF_DIRECT) {
+ assert(cache->slab_size == PAGE_SIZE);
+ slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size)
+ - 1;
+ } else {
+ struct rbtree_node *node;
+
+ node = rbtree_lookup_nearest(&cache->active_slabs, buf,
+ kmem_slab_cmp_lookup, RBTREE_LEFT);
+ assert(node != NULL);
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+ assert((unsigned long)buf < (P2ALIGN((unsigned long)slab->addr
+ + cache->slab_size, PAGE_SIZE)));
+ }
+
+ assert(slab->nr_refs >= 1);
+ assert(slab->nr_refs <= cache->bufs_per_slab);
+ bufctl = kmem_buf_to_bufctl(buf, cache);
+ bufctl->next = slab->first_free;
+ slab->first_free = bufctl;
+ slab->nr_refs--;
+ cache->nr_objs--;
+
+ /*
+ * The slab has become free.
+ */
+ if (slab->nr_refs == 0) {
+ if (kmem_slab_use_tree(cache->flags))
+ rbtree_remove(&cache->active_slabs, &slab->tree_node);
+
+ /*
+ * The slab was partial.
+ */
+ if (cache->bufs_per_slab > 1)
+ list_remove(&slab->list_node);
+
+ list_insert_tail(&cache->free_slabs, &slab->list_node);
+ cache->nr_free_slabs++;
+ } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) {
+ /*
+ * The slab has become partial.
+ */
+ list_insert(&cache->partial_slabs, &slab->list_node);
+ } else if (!list_singular(&cache->partial_slabs)) {
+ struct list *node;
+ struct kmem_slab *tmp;
+
+ /*
+ * The slab remains partial. If there are more than one partial slabs,
+ * maintain the list sorted.
+ */
+
+ assert(slab->nr_refs > 0);
+
+ for (node = list_next(&slab->list_node);
+ !list_end(&cache->partial_slabs, node);
+ node = list_next(node)) {
+ tmp = list_entry(node, struct kmem_slab, list_node);
+
+ if (tmp->nr_refs <= slab->nr_refs)
+ break;
+ }
+
+ /*
+ * If the direct neighbor was found, the list is already sorted.
+ * If no slab was found, the slab is inserted at the tail of the list.
+ */
+ if (node != list_next(&slab->list_node)) {
+ list_remove(&slab->list_node);
+ list_insert_before(node, &slab->list_node);
+ }
+ }
+}
+
+static void kmem_cache_alloc_verify(struct kmem_cache *cache, void *buf,
+ int construct)
+{
+ struct kmem_buftag *buftag;
+ union kmem_bufctl *bufctl;
+ void *addr;
+
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ if (buftag->state != KMEM_BUFTAG_FREE)
+ kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
+
+ addr = kmem_buf_verify_fill(buf, KMEM_FREE_PATTERN, KMEM_UNINIT_PATTERN,
+ cache->bufctl_dist);
+
+ if (addr != NULL)
+ kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr);
+
+ addr = buf + cache->obj_size;
+ memset(addr, KMEM_REDZONE_BYTE, cache->redzone_pad);
+
+ bufctl = kmem_buf_to_bufctl(buf, cache);
+ bufctl->redzone = KMEM_REDZONE_WORD;
+ buftag->state = KMEM_BUFTAG_ALLOC;
+
+ if (construct && (cache->ctor != NULL))
+ cache->ctor(buf);
+}
+
+vm_offset_t kmem_cache_alloc(struct kmem_cache *cache)
+{
+ int filled;
+ void *buf;
+
+#if SLAB_USE_CPU_POOLS
+ struct kmem_cpu_pool *cpu_pool;
+
+ cpu_pool = kmem_cpu_pool_get(cache);
+
+ if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL)
+ goto slab_alloc;
+
+ simple_lock(&cpu_pool->lock);
+
+fast_alloc:
+ if (likely(cpu_pool->nr_objs > 0)) {
+ buf = kmem_cpu_pool_pop(cpu_pool);
+ simple_unlock(&cpu_pool->lock);
+
+ if (cpu_pool->flags & KMEM_CF_VERIFY)
+ kmem_cache_alloc_verify(cache, buf, KMEM_AV_CONSTRUCT);
+
+ return (vm_offset_t)buf;
+ }
+
+ if (cpu_pool->array != NULL) {
+ filled = kmem_cpu_pool_fill(cpu_pool, cache);
+
+ if (!filled) {
+ simple_unlock(&cpu_pool->lock);
+
+ filled = kmem_cache_grow(cache);
+
+ if (!filled)
+ return 0;
+
+ simple_lock(&cpu_pool->lock);
+ }
+
+ goto fast_alloc;
+ }
+
+ simple_unlock(&cpu_pool->lock);
+#endif /* SLAB_USE_CPU_POOLS */
+
+slab_alloc:
+ simple_lock(&cache->lock);
+ buf = kmem_cache_alloc_from_slab(cache);
+ simple_unlock(&cache->lock);
+
+ if (buf == NULL) {
+ filled = kmem_cache_grow(cache);
+
+ if (!filled)
+ return 0;
+
+ goto slab_alloc;
+ }
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kmem_cache_alloc_verify(cache, buf, KMEM_AV_NOCONSTRUCT);
+
+ if (cache->ctor != NULL)
+ cache->ctor(buf);
+
+ return (vm_offset_t)buf;
+}
+
+static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf)
+{
+ struct rbtree_node *node;
+ struct kmem_buftag *buftag;
+ struct kmem_slab *slab;
+ union kmem_bufctl *bufctl;
+ unsigned char *redzone_byte;
+ unsigned long slabend;
+
+ simple_lock(&cache->lock);
+ node = rbtree_lookup_nearest(&cache->active_slabs, buf,
+ kmem_slab_cmp_lookup, RBTREE_LEFT);
+ simple_unlock(&cache->lock);
+
+ if (node == NULL)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ slab = rbtree_entry(node, struct kmem_slab, tree_node);
+ slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE);
+
+ if ((unsigned long)buf >= slabend)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ if ((((unsigned long)buf - (unsigned long)slab->addr) % cache->buf_size)
+ != 0)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ /*
+ * As the buffer address is valid, accessing its buftag is safe.
+ */
+ buftag = kmem_buf_to_buftag(buf, cache);
+
+ if (buftag->state != KMEM_BUFTAG_ALLOC) {
+ if (buftag->state == KMEM_BUFTAG_FREE)
+ kmem_cache_error(cache, buf, KMEM_ERR_DOUBLEFREE, NULL);
+ else
+ kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag);
+ }
+
+ redzone_byte = buf + cache->obj_size;
+ bufctl = kmem_buf_to_bufctl(buf, cache);
+
+ while (redzone_byte < (unsigned char *)bufctl) {
+ if (*redzone_byte != KMEM_REDZONE_BYTE)
+ kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
+
+ redzone_byte++;
+ }
+
+ if (bufctl->redzone != KMEM_REDZONE_WORD) {
+ unsigned long word;
+
+ word = KMEM_REDZONE_WORD;
+ redzone_byte = kmem_buf_verify_bytes(&bufctl->redzone, &word,
+ sizeof(bufctl->redzone));
+ kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
+ }
+
+ kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist);
+ buftag->state = KMEM_BUFTAG_FREE;
+}
+
+void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj)
+{
+#if SLAB_USE_CPU_POOLS
+ struct kmem_cpu_pool *cpu_pool;
+ void **array;
+
+ cpu_pool = kmem_cpu_pool_get(cache);
+
+ if (cpu_pool->flags & KMEM_CF_VERIFY) {
+#else /* SLAB_USE_CPU_POOLS */
+ if (cache->flags & KMEM_CF_VERIFY) {
+#endif /* SLAB_USE_CPU_POOLS */
+ kmem_cache_free_verify(cache, (void *)obj);
+ }
+
+#if SLAB_USE_CPU_POOLS
+ if (cpu_pool->flags & KMEM_CF_NO_CPU_POOL)
+ goto slab_free;
+
+ simple_lock(&cpu_pool->lock);
+
+fast_free:
+ if (likely(cpu_pool->nr_objs < cpu_pool->size)) {
+ kmem_cpu_pool_push(cpu_pool, (void *)obj);
+ simple_unlock(&cpu_pool->lock);
+ return;
+ }
+
+ if (cpu_pool->array != NULL) {
+ kmem_cpu_pool_drain(cpu_pool, cache);
+ goto fast_free;
+ }
+
+ simple_unlock(&cpu_pool->lock);
+
+ array = (void *)kmem_cache_alloc(cache->cpu_pool_type->array_cache);
+
+ if (array != NULL) {
+ simple_lock(&cpu_pool->lock);
+
+ /*
+ * Another thread may have built the CPU pool while the lock was
+ * dropped.
+ */
+ if (cpu_pool->array != NULL) {
+ simple_unlock(&cpu_pool->lock);
+ kmem_cache_free(cache->cpu_pool_type->array_cache,
+ (vm_offset_t)array);
+ goto fast_free;
+ }
+
+ kmem_cpu_pool_build(cpu_pool, cache, array);
+ goto fast_free;
+ }
+
+slab_free:
+#endif /* SLAB_USE_CPU_POOLS */
+
+ kmem_cache_free_to_slab(cache, (void *)obj);
+}
+
+void slab_collect(void)
+{
+ struct kmem_cache *cache;
+
+ if (sched_tick <= (kmem_gc_last_tick + KMEM_GC_INTERVAL))
+ return;
+
+ kmem_gc_last_tick = sched_tick;
+
+ simple_lock(&mem_cache_list_lock);
+
+ list_for_each_entry(&kmem_cache_list, cache, node)
+ kmem_cache_reap(cache);
+
+ simple_unlock(&mem_cache_list_lock);
+}
+
+void slab_bootstrap(void)
+{
+ /* Make sure a bufctl can always be stored in a buffer */
+ assert(sizeof(union kmem_bufctl) <= KMEM_ALIGN_MIN);
+
+ list_init(&kmem_cache_list);
+ simple_lock_init(&kmem_cache_list_lock);
+}
+
+void slab_init(void)
+{
+ vm_offset_t min, max;
+
+#if SLAB_USE_CPU_POOLS
+ struct kmem_cpu_pool_type *cpu_pool_type;
+ char name[KMEM_CACHE_NAME_SIZE];
+ size_t i, size;
+#endif /* SLAB_USE_CPU_POOLS */
+
+ kmem_submap(kmem_map, kernel_map, &min, &max, KMEM_MAP_SIZE, FALSE);
+
+#if SLAB_USE_CPU_POOLS
+ for (i = 0; i < ARRAY_SIZE(kmem_cpu_pool_types); i++) {
+ cpu_pool_type = &kmem_cpu_pool_types[i];
+ cpu_pool_type->array_cache = &kmem_cpu_array_caches[i];
+ sprintf(name, "kmem_cpu_array_%d", cpu_pool_type->array_size);
+ size = sizeof(void *) * cpu_pool_type->array_size;
+ kmem_cache_init(cpu_pool_type->array_cache, name, size,
+ cpu_pool_type->array_align, NULL, NULL, NULL, 0);
+ }
+#endif /* SLAB_USE_CPU_POOLS */
+
+ /*
+ * Prevent off slab data for the slab cache to avoid infinite recursion.
+ */
+ kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab),
+ 0, NULL, NULL, NULL, KMEM_CACHE_NOOFFSLAB);
+}
+
+static vm_offset_t kalloc_pagealloc(vm_size_t size)
+{
+ vm_offset_t addr;
+ kern_return_t kr;
+
+ kr = kmem_alloc_wired(kalloc_map, &addr, size);
+
+ if (kr != KERN_SUCCESS)
+ return 0;
+
+ return addr;
+}
+
+static void kalloc_pagefree(vm_offset_t ptr, vm_size_t size)
+{
+ kmem_free(kalloc_map, ptr, size);
+}
+
+void kalloc_init(void)
+{
+ char name[KMEM_CACHE_NAME_SIZE];
+ size_t i, size;
+ vm_offset_t min, max;
+
+ kmem_submap(kalloc_map, kernel_map, &min, &max, KALLOC_MAP_SIZE, FALSE);
+
+ size = 1 << KALLOC_FIRST_SHIFT;
+
+ for (i = 0; i < ARRAY_SIZE(kalloc_caches); i++) {
+ sprintf(name, "kalloc_%u", size);
+ kmem_cache_init(&kalloc_caches[i], name, size, 0, NULL,
+ kalloc_pagealloc, kalloc_pagefree, 0);
+ size <<= 1;
+ }
+}
+
+/*
+ * Return the kalloc cache index matching the given allocation size, which
+ * must be strictly greater than 0.
+ */
+static inline size_t kalloc_get_index(unsigned long size)
+{
+ assert(size != 0);
+
+ size = (size - 1) >> KALLOC_FIRST_SHIFT;
+
+ if (size == 0)
+ return 0;
+ else
+ return (sizeof(long) * 8) - __builtin_clzl(size);
+}
+
+static void kalloc_verify(struct kmem_cache *cache, void *buf, size_t size)
+{
+ size_t redzone_size;
+ void *redzone;
+
+ assert(size <= cache->obj_size);
+
+ redzone = buf + size;
+ redzone_size = cache->obj_size - size;
+ memset(redzone, KMEM_REDZONE_BYTE, redzone_size);
+}
+
+vm_offset_t kalloc(vm_size_t size)
+{
+ size_t index;
+ void *buf;
+
+ if (size == 0)
+ return 0;
+
+ index = kalloc_get_index(size);
+
+ if (index < ARRAY_SIZE(kalloc_caches)) {
+ struct kmem_cache *cache;
+
+ cache = &kalloc_caches[index];
+ buf = (void *)kmem_cache_alloc(cache);
+
+ if ((buf != 0) && (cache->flags & KMEM_CF_VERIFY))
+ kalloc_verify(cache, buf, size);
+ } else
+ buf = (void *)kalloc_pagealloc(size);
+
+ return (vm_offset_t)buf;
+}
+
+static void kfree_verify(struct kmem_cache *cache, void *buf, size_t size)
+{
+ unsigned char *redzone_byte, *redzone_end;
+
+ assert(size <= cache->obj_size);
+
+ redzone_byte = buf + size;
+ redzone_end = buf + cache->obj_size;
+
+ while (redzone_byte < redzone_end) {
+ if (*redzone_byte != KMEM_REDZONE_BYTE)
+ kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte);
+
+ redzone_byte++;
+ }
+}
+
+void kfree(vm_offset_t data, vm_size_t size)
+{
+ size_t index;
+
+ if ((data == 0) || (size == 0))
+ return;
+
+ index = kalloc_get_index(size);
+
+ if (index < ARRAY_SIZE(kalloc_caches)) {
+ struct kmem_cache *cache;
+
+ cache = &kalloc_caches[index];
+
+ if (cache->flags & KMEM_CF_VERIFY)
+ kfree_verify(cache, (void *)data, size);
+
+ kmem_cache_free(cache, data);
+ } else {
+ kalloc_pagefree(data, size);
+ }
+}
+
+#if MACH_DEBUG
+kern_return_t host_slab_info(host_t host, cache_info_array_t *infop,
+ unsigned int *infoCntp)
+{
+ struct kmem_cache *cache;
+ cache_info_t *info;
+ unsigned int i, nr_caches;
+ vm_size_t info_size = info_size;
+ kern_return_t kr;
+
+ if (host == HOST_NULL)
+ return KERN_INVALID_HOST;
+
+ /*
+ * Assume the cache list is unaltered once the kernel is ready.
+ */
+
+ simple_lock(&mem_cache_list_lock);
+ nr_caches = kmem_nr_caches;
+ simple_unlock(&mem_cache_list_lock);
+
+ if (nr_caches <= *infoCntp)
+ info = *infop;
+ else {
+ vm_offset_t info_addr;
+
+ info_size = round_page(nr_caches * sizeof(*info));
+ kr = kmem_alloc_pageable(ipc_kernel_map, &info_addr, info_size);
+
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ info = (cache_info_t *)info_addr;
+ }
+
+ if (info == NULL)
+ return KERN_RESOURCE_SHORTAGE;
+
+ i = 0;
+
+ list_for_each_entry(&kmem_cache_list, cache, node) {
+ simple_lock(&cache_lock);
+ info[i].flags = ((cache->flags & KMEM_CF_NO_CPU_POOL)
+ ? CACHE_FLAGS_NO_CPU_POOL : 0)
+ | ((cache->flags & KMEM_CF_SLAB_EXTERNAL)
+ ? CACHE_FLAGS_SLAB_EXTERNAL : 0)
+ | ((cache->flags & KMEM_CF_NO_RECLAIM)
+ ? CACHE_FLAGS_NO_RECLAIM : 0)
+ | ((cache->flags & KMEM_CF_VERIFY)
+ ? CACHE_FLAGS_VERIFY : 0)
+ | ((cache->flags & KMEM_CF_DIRECT)
+ ? CACHE_FLAGS_DIRECT : 0);
+#if SLAB_USE_CPU_POOLS
+ info[i].cpu_pool_size = cache->cpu_pool_type->array_size;
+#else /* SLAB_USE_CPU_POOLS */
+ info[i].cpu_pool_size = 0;
+#endif /* SLAB_USE_CPU_POOLS */
+ info[i].obj_size = cache->obj_size;
+ info[i].align = cache->align;
+ info[i].buf_size = cache->buf_size;
+ info[i].slab_size = cache->slab_size;
+ info[i].bufs_per_slab = cache->bufs_per_slab;
+ info[i].nr_objs = cache->nr_objs;
+ info[i].nr_bufs = cache->nr_bufs;
+ info[i].nr_slabs = cache->nr_slabs;
+ info[i].nr_free_slabs = cache->nr_free_slabs;
+ strncpy(info[i].name, cache->name, sizeof(info[i].name));
+ info[i].name[sizeof(info[i].name) - 1] = '\0';
+ simple_unlock(&cache->lock);
+
+ i++;
+ }
+
+ if (info != *infop) {
+ vm_map_copy_t copy;
+ vm_size_t used;
+
+ used = nr_caches * sizeof(*info);
+
+ if (used != info_size)
+ memset((char *)info + used, 0, info_size - used);
+
+ kr = vm_map_copyin(ipc_kernel_map, (vm_offset_t)info, used, TRUE,
+ &copy);
+
+ assert(kr == KERN_SUCCESS);
+ *infop = (cache_info_t *)copy;
+ }
+
+ *infoCntp = nr_caches;
+
+ return KERN_SUCCESS;
+}
+#endif /* MACH_DEBUG */
diff --git a/kern/slab.h b/kern/slab.h
new file mode 100644
index 0000000..14c820b
--- /dev/null
+++ b/kern/slab.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2009, 2010, 2011 Richard Braun.
+ * Copyright (c) 2011 Maksym Planeta.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _KERN_SLAB_H
+#define _KERN_SLAB_H
+
+#include <kern/lock.h>
+#include <kern/list.h>
+#include <kern/rbtree.h>
+#include <mach/machine/vm_types.h>
+#include <sys/types.h>
+#include <vm/vm_types.h>
+
+#if SLAB_USE_CPU_POOLS
+/*
+ * L1 cache line size.
+ */
+#define CPU_L1_SIZE (1 << CPU_L1_SHIFT)
+
+/*
+ * Per-processor cache of pre-constructed objects.
+ *
+ * The flags member is a read-only CPU-local copy of the parent cache flags.
+ */
+struct kmem_cpu_pool {
+ simple_lock_data_t lock;
+ int flags;
+ int size;
+ int transfer_size;
+ int nr_objs;
+ void **array;
+} __attribute__((aligned(CPU_L1_SIZE)));
+
+/*
+ * When a cache is created, its CPU pool type is determined from the buffer
+ * size. For small buffer sizes, many objects can be cached in a CPU pool.
+ * Conversely, for large buffer sizes, this would incur much overhead, so only
+ * a few objects are stored in a CPU pool.
+ */
+struct kmem_cpu_pool_type {
+ size_t buf_size;
+ int array_size;
+ size_t array_align;
+ struct kmem_cache *array_cache;
+};
+#endif /* SLAB_USE_CPU_POOLS */
+
+/*
+ * Buffer descriptor.
+ *
+ * For normal caches (i.e. without SLAB_CF_VERIFY), bufctls are located at the
+ * end of (but inside) each buffer. If SLAB_CF_VERIFY is set, bufctls are
+ * located after each buffer.
+ *
+ * When an object is allocated to a client, its bufctl isn't used. This memory
+ * is instead used for redzoning if cache debugging is in effect.
+ */
+union kmem_bufctl {
+ union kmem_bufctl *next;
+ unsigned long redzone;
+};
+
+/*
+ * Buffer tag.
+ *
+ * This structure is only used for SLAB_CF_VERIFY caches. It is located after
+ * the bufctl and includes information about the state of the buffer it
+ * describes (allocated or not). It should be thought of as a debugging
+ * extension of the bufctl.
+ */
+struct kmem_buftag {
+ unsigned long state;
+};
+
+/*
+ * Page-aligned collection of unconstructed buffers.
+ */
+struct kmem_slab {
+ struct list list_node;
+ struct rbtree_node tree_node;
+ unsigned long nr_refs;
+ union kmem_bufctl *first_free;
+ void *addr;
+};
+
+/*
+ * Type for constructor functions.
+ *
+ * The pre-constructed state of an object is supposed to include only
+ * elements such as e.g. linked lists, locks, reference counters. Therefore
+ * constructors are expected to 1) never fail and 2) not need any
+ * user-provided data. The first constraint implies that object construction
+ * never performs dynamic resource allocation, which also means there is no
+ * need for destructors.
+ */
+typedef void (*kmem_cache_ctor_t)(void *obj);
+
+/*
+ * Types for slab allocation/free functions.
+ *
+ * All addresses and sizes must be page-aligned.
+ */
+typedef vm_offset_t (*kmem_slab_alloc_fn_t)(vm_size_t);
+typedef void (*kmem_slab_free_fn_t)(vm_offset_t, vm_size_t);
+
+/*
+ * Cache name buffer size.
+ */
+#define KMEM_CACHE_NAME_SIZE 32
+
+/*
+ * Cache of objects.
+ *
+ * Locking order : cpu_pool -> cache. CPU pools locking is ordered by CPU ID.
+ *
+ * The partial slabs list is sorted by slab references. Slabs with a high
+ * number of references are placed first on the list to reduce fragmentation.
+ * Sorting occurs at insertion/removal of buffers in a slab. As the list
+ * is maintained sorted, and the number of references only changes by one,
+ * this is a very cheap operation in the average case and the worst (linear)
+ * case is very unlikely.
+ */
+struct kmem_cache {
+#if SLAB_USE_CPU_POOLS
+ /* CPU pool layer */
+ struct kmem_cpu_pool cpu_pools[NCPUS];
+ struct kmem_cpu_pool_type *cpu_pool_type;
+#endif /* SLAB_USE_CPU_POOLS */
+
+ /* Slab layer */
+ simple_lock_data_t lock;
+ struct list node; /* Cache list linkage */
+ struct list partial_slabs;
+ struct list free_slabs;
+ struct rbtree active_slabs;
+ int flags;
+ size_t obj_size; /* User-provided size */
+ size_t align;
+ size_t buf_size; /* Aligned object size */
+ size_t bufctl_dist; /* Distance from buffer to bufctl */
+ size_t slab_size;
+ size_t color;
+ size_t color_max;
+ unsigned long bufs_per_slab;
+ unsigned long nr_objs; /* Number of allocated objects */
+ unsigned long nr_bufs; /* Total number of buffers */
+ unsigned long nr_slabs;
+ unsigned long nr_free_slabs;
+ kmem_cache_ctor_t ctor;
+ kmem_slab_alloc_fn_t slab_alloc_fn;
+ kmem_slab_free_fn_t slab_free_fn;
+ char name[KMEM_CACHE_NAME_SIZE];
+ size_t buftag_dist; /* Distance from buffer to buftag */
+ size_t redzone_pad; /* Bytes from end of object to redzone word */
+};
+
+/*
+ * Mach-style declarations for struct kmem_cache.
+ */
+typedef struct kmem_cache *kmem_cache_t;
+#define KMEM_CACHE_NULL ((kmem_cache_t) 0)
+
+/*
+ * VM submap for slab allocations.
+ */
+extern vm_map_t kmem_map;
+
+/*
+ * Cache initialization flags.
+ */
+#define KMEM_CACHE_NOCPUPOOL 0x1 /* Don't use the per-cpu pools */
+#define KMEM_CACHE_NOOFFSLAB 0x2 /* Don't allocate external slab data */
+#define KMEM_CACHE_NORECLAIM 0x4 /* Never give slabs back to their source,
+ implies KMEM_CACHE_NOOFFSLAB */
+#define KMEM_CACHE_VERIFY 0x8 /* Use debugging facilities */
+
+/*
+ * Initialize a cache.
+ */
+void kmem_cache_init(struct kmem_cache *cache, const char *name,
+ size_t obj_size, size_t align, kmem_cache_ctor_t ctor,
+ kmem_slab_alloc_fn_t slab_alloc_fn,
+ kmem_slab_free_fn_t slab_free_fn, int flags);
+
+/*
+ * Allocate an object from a cache.
+ */
+vm_offset_t kmem_cache_alloc(struct kmem_cache *cache);
+
+/*
+ * Release an object to its cache.
+ */
+void kmem_cache_free(struct kmem_cache *cache, vm_offset_t obj);
+
+/*
+ * Initialize the memory allocator module.
+ */
+void slab_bootstrap(void);
+void slab_init(void);
+
+/*
+ * Release free slabs to the VM system.
+ */
+void slab_collect(void);
+
+#endif /* _KERN_SLAB_H */
diff --git a/kern/startup.c b/kern/startup.c
index a4b5a6f..3bdda16 100644
--- a/kern/startup.c
+++ b/kern/startup.c
@@ -48,7 +48,6 @@
#include <kern/timer.h>
#include <kern/xpr.h>
#include <kern/time_stamp.h>
-#include <kern/zalloc.h>
#include <vm/vm_kern.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
diff --git a/kern/task.c b/kern/task.c
index 88da16e..2624dd9 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -40,10 +40,9 @@
#include <ipc/ipc_space.h>
#include <ipc/ipc_types.h>
#include <kern/debug.h>
-#include <kern/mach_param.h>
#include <kern/task.h>
#include <kern/thread.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <kern/kalloc.h>
#include <kern/processor.h>
#include <kern/sched_prim.h> /* for thread_wakeup */
@@ -52,7 +51,7 @@
#include <machine/machspl.h> /* for splsched */
task_t kernel_task = TASK_NULL;
-zone_t task_zone;
+struct kmem_cache task_cache;
extern void eml_init(void);
extern void eml_task_reference(task_t, task_t);
@@ -60,11 +59,8 @@ extern void eml_task_deallocate(task_t);
void task_init(void)
{
- task_zone = zinit(
- sizeof(struct task), 0,
- TASK_MAX * sizeof(struct task),
- TASK_CHUNK * sizeof(struct task),
- 0, "tasks");
+ kmem_cache_init(&task_cache, "task", sizeof(struct task), 0,
+ NULL, NULL, NULL, 0);
eml_init();
machine_task_module_init ();
@@ -77,38 +73,6 @@ void task_init(void)
(void) task_create(TASK_NULL, FALSE, &kernel_task);
}
-/*
- * Create a task running in the kernel address space. It may
- * have its own map of size mem_size (if 0, it uses the kernel map),
- * and may have ipc privileges.
- */
-task_t kernel_task_create(
- task_t parent_task,
- vm_size_t map_size)
-{
- task_t new_task;
- vm_offset_t min, max;
-
- /*
- * Create the task.
- */
- (void) task_create(parent_task, FALSE, &new_task);
-
- /*
- * Task_create creates the task with a user-space map.
- * Remove the map and replace it with the kernel map
- * or a submap of the kernel map.
- */
- vm_map_deallocate(new_task->map);
- if (map_size == 0)
- new_task->map = kernel_map;
- else
- new_task->map = kmem_suballoc(kernel_map, &min, &max,
- map_size, FALSE);
-
- return new_task;
-}
-
kern_return_t task_create(
task_t parent_task,
boolean_t inherit_memory,
@@ -120,7 +84,7 @@ kern_return_t task_create(
int i;
#endif
- new_task = (task_t) zalloc(task_zone);
+ new_task = (task_t) kmem_cache_alloc(&task_cache);
if (new_task == TASK_NULL) {
panic("task_create: no memory for task structure");
}
@@ -235,7 +199,7 @@ void task_deallocate(
pset_deallocate(pset);
vm_map_deallocate(task->map);
is_release(task->itk_space);
- zfree(task_zone, (vm_offset_t) task);
+ kmem_cache_free(&task_cache, (vm_offset_t) task);
}
void task_reference(
diff --git a/kern/task.h b/kern/task.h
index 9d90243..c425158 100644
--- a/kern/task.h
+++ b/kern/task.h
@@ -162,8 +162,6 @@ extern kern_return_t task_hold(task_t);
extern kern_return_t task_dowait(task_t, boolean_t);
extern kern_return_t task_release(task_t);
-extern task_t kernel_task_create(task_t, vm_size_t);
-
extern task_t kernel_task;
#endif /* _KERN_TASK_H_ */
diff --git a/kern/thread.c b/kern/thread.c
index bf2df94..74e1c4b 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -45,7 +45,6 @@
#include <kern/eventcount.h>
#include <kern/ipc_mig.h>
#include <kern/ipc_tt.h>
-#include <kern/mach_param.h>
#include <kern/processor.h>
#include <kern/queue.h>
#include <kern/sched.h>
@@ -54,7 +53,8 @@
#include <kern/thread.h>
#include <kern/thread_swap.h>
#include <kern/host.h>
-#include <kern/zalloc.h>
+#include <kern/kalloc.h>
+#include <kern/slab.h>
#include <kern/mach_clock.h>
#include <vm/vm_kern.h>
#include <ipc/ipc_kmsg.h>
@@ -67,7 +67,7 @@
thread_t active_threads[NCPUS];
vm_offset_t active_stacks[NCPUS];
-struct zone *thread_zone;
+struct kmem_cache thread_cache;
queue_head_t reaper_queue;
decl_simple_lock_data(, reaper_lock)
@@ -300,11 +300,8 @@ void stack_privilege(
void thread_init(void)
{
- thread_zone = zinit(
- sizeof(struct thread), 0,
- THREAD_MAX * sizeof(struct thread),
- THREAD_CHUNK * sizeof(struct thread),
- 0, "threads");
+ kmem_cache_init(&thread_cache, "thread", sizeof(struct thread), 0,
+ NULL, NULL, NULL, 0);
/*
* Fill in a template thread for fast initialization.
@@ -414,7 +411,7 @@ kern_return_t thread_create(
* Allocate a thread and initialize static fields
*/
- new_thread = (thread_t) zalloc(thread_zone);
+ new_thread = (thread_t) kmem_cache_alloc(&thread_cache);
if (new_thread == THREAD_NULL)
return KERN_RESOURCE_SHORTAGE;
@@ -710,7 +707,7 @@ void thread_deallocate(
evc_notify_abort(thread);
pcb_terminate(thread);
- zfree(thread_zone, (vm_offset_t) thread);
+ kmem_cache_free(&thread_cache, (vm_offset_t) thread);
}
void thread_reference(
diff --git a/kern/zalloc.c b/kern/zalloc.c
deleted file mode 100644
index 43836a6..0000000
--- a/kern/zalloc.c
+++ /dev/null
@@ -1,1007 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1993-1987 Carnegie Mellon University.
- * Copyright (c) 1993,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
- * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
- * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
- * THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * File: kern/zalloc.c
- * Author: Avadis Tevanian, Jr.
- *
- * Zone-based memory allocator. A zone is a collection of fixed size
- * data blocks for which quick allocation/deallocation is possible.
- */
-
-#include <string.h>
-
-#include <kern/debug.h>
-#include <kern/macro_help.h>
-#include <kern/printf.h>
-#include <kern/mach_clock.h>
-#include <kern/sched.h>
-#include <kern/zalloc.h>
-#include <mach/vm_param.h>
-#include <vm/vm_kern.h>
-#include <machine/machspl.h>
-
-#if MACH_DEBUG
-#include <mach/kern_return.h>
-#include <mach/machine/vm_types.h>
-#include <mach_debug/zone_info.h>
-#include <kern/host.h>
-#include <vm/vm_map.h>
-#include <vm/vm_user.h>
-#include <vm/vm_kern.h>
-#endif
-
-#define ADD_TO_ZONE(zone, element) \
-MACRO_BEGIN \
- *((vm_offset_t *)(element)) = (zone)->free_elements; \
- (zone)->free_elements = (vm_offset_t) (element); \
- zone_count_down(zone); \
-MACRO_END
-
-#define REMOVE_FROM_ZONE(zone, ret, type) \
-MACRO_BEGIN \
- (ret) = (type) (zone)->free_elements; \
- if ((ret) != (type) 0) { \
- zone_count_up(zone); \
- (zone)->free_elements = *((vm_offset_t *)(ret)); \
- } \
-MACRO_END
-
-#define ALIGN_SIZE_UP(size, align) \
-((size) = (((size) + ((align) - 1)) & ~((align) - 1)))
-
-/*
- * Support for garbage collection of unused zone pages:
- */
-
-struct zone_page_table_entry {
- struct zone_page_table_entry *next;
- short in_free_list;
- short alloc_count;
-};
-
-extern struct zone_page_table_entry * zone_page_table;
-extern vm_offset_t zone_map_min_address;
-
-#define lock_zone_page_table() simple_lock(&zone_page_table_lock)
-#define unlock_zone_page_table() simple_unlock(&zone_page_table_lock)
-
-#define zone_page(addr) \
- (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))]))
-
-
-extern void zone_page_alloc(vm_offset_t, vm_size_t);
-extern void zone_page_dealloc(vm_offset_t, vm_size_t);
-extern void zone_page_in_use(vm_offset_t, vm_size_t);
-extern void zone_page_free(vm_offset_t, vm_size_t);
-
-zone_t zone_zone; /* this is the zone containing other zones */
-
-boolean_t zone_ignore_overflow = TRUE;
-
-vm_map_t zone_map = VM_MAP_NULL;
-vm_size_t zone_map_size = 64 * 1024 * 1024;
-
-/*
- * The VM system gives us an initial chunk of memory.
- * It has to be big enough to allocate the zone_zone
- * and some initial kernel data structures, like kernel maps.
- * It is advantageous to make it bigger than really necessary,
- * because this memory is more efficient than normal kernel
- * virtual memory. (It doesn't have vm_page structures backing it
- * and it may have other machine-dependent advantages.)
- * So for best performance, zdata_size should approximate
- * the amount of memory you expect the zone system to consume.
- */
-
-vm_offset_t zdata;
-vm_size_t zdata_size = 420 * 1024;
-
-#define zone_lock(zone) \
-MACRO_BEGIN \
- if (zone->type & ZONE_PAGEABLE) { \
- lock_write(&zone->complex_lock); \
- } else { \
- simple_lock(&zone->lock); \
- } \
-MACRO_END
-
-#define zone_unlock(zone) \
-MACRO_BEGIN \
- if (zone->type & ZONE_PAGEABLE) { \
- lock_done(&zone->complex_lock); \
- } else { \
- simple_unlock(&zone->lock); \
- } \
-MACRO_END
-
-#define zone_lock_init(zone) \
-MACRO_BEGIN \
- if (zone->type & ZONE_PAGEABLE) { \
- lock_init(&zone->complex_lock, TRUE); \
- } else { \
- simple_lock_init(&zone->lock); \
- } \
-MACRO_END
-
-static vm_offset_t zget_space(vm_offset_t size, vm_size_t align);
-
-decl_simple_lock_data(,zget_space_lock)
-vm_offset_t zalloc_next_space;
-vm_offset_t zalloc_end_of_space;
-vm_size_t zalloc_wasted_space;
-
-/*
- * Garbage collection map information
- */
-decl_simple_lock_data(,zone_page_table_lock)
-struct zone_page_table_entry * zone_page_table;
-vm_offset_t zone_map_min_address;
-vm_offset_t zone_map_max_address;
-int zone_pages;
-
-extern void zone_page_init(vm_offset_t, vm_size_t, int);
-
-#define ZONE_PAGE_USED 0
-#define ZONE_PAGE_UNUSED -1
-
-
-/*
- * Protects first_zone, last_zone, num_zones,
- * and the next_zone field of zones.
- */
-decl_simple_lock_data(,all_zones_lock)
-zone_t first_zone;
-zone_t *last_zone;
-int num_zones;
-
-/*
- * zinit initializes a new zone. The zone data structures themselves
- * are stored in a zone, which is initially a static structure that
- * is initialized by zone_init.
- */
-zone_t zinit(size, align, max, alloc, memtype, name)
- vm_size_t size; /* the size of an element */
- vm_size_t align; /* alignment of elements */
- vm_size_t max; /* maximum memory to use */
- vm_size_t alloc; /* allocation size */
- unsigned int memtype; /* flags specifying type of memory */
- char *name; /* a name for the zone */
-{
- register zone_t z;
-
- if (zone_zone == ZONE_NULL)
- z = (zone_t) zget_space(sizeof(struct zone), 0);
- else
- z = (zone_t) zalloc(zone_zone);
- if (z == ZONE_NULL)
- panic("zinit");
- if (alloc == 0)
- alloc = PAGE_SIZE;
-
- if (size == 0)
- size = sizeof(z->free_elements);
- /*
- * Round off all the parameters appropriately.
- */
-
- if ((max = round_page(max)) < (alloc = round_page(alloc)))
- max = alloc;
-
- if (align > 0) {
- if (PAGE_SIZE % align || align % sizeof(z->free_elements))
- panic("zinit");
- ALIGN_SIZE_UP(size, align);
- }
-
- z->free_elements = 0;
- z->cur_size = 0;
- z->max_size = max;
- z->elem_size = ((size-1) + sizeof(z->free_elements)) -
- ((size-1) % sizeof(z->free_elements));
- z->align = align;
-
- z->alloc_size = alloc;
- z->type = memtype;
- z->zone_name = name;
-#ifdef ZONE_COUNT
- z->count = 0;
-#endif
- z->doing_alloc = FALSE;
- zone_lock_init(z);
-
- /*
- * Add the zone to the all-zones list.
- */
-
- z->next_zone = ZONE_NULL;
- simple_lock(&all_zones_lock);
- *last_zone = z;
- last_zone = &z->next_zone;
- num_zones++;
- simple_unlock(&all_zones_lock);
-
- return(z);
-}
-
-/*
- * Cram the given memory into the specified zone.
- */
-void zcram(zone_t zone, vm_offset_t newmem, vm_size_t size)
-{
- register vm_size_t elem_size;
-
- if (newmem == (vm_offset_t) 0) {
- panic("zcram - memory at zero");
- }
- elem_size = zone->elem_size;
-
- zone_lock(zone);
- while (size >= elem_size) {
- ADD_TO_ZONE(zone, newmem);
- zone_page_alloc(newmem, elem_size);
- zone_count_up(zone); /* compensate for ADD_TO_ZONE */
- size -= elem_size;
- newmem += elem_size;
- zone->cur_size += elem_size;
- }
- zone_unlock(zone);
-}
-
-/*
- * Contiguous space allocator for non-paged zones. Allocates "size" amount
- * of memory from zone_map.
- */
-
-static vm_offset_t zget_space(vm_offset_t size, vm_size_t align)
-{
- vm_offset_t new_space = 0;
- vm_offset_t result;
- vm_size_t space_to_add = 0; /*'=0' to quiet gcc warnings */
-
- simple_lock(&zget_space_lock);
- if (align > 0) {
- assert(align < PAGE_SIZE);
- ALIGN_SIZE_UP(zalloc_next_space, align);
- }
-
- while ((zalloc_next_space + size) > zalloc_end_of_space) {
- /*
- * Add at least one page to allocation area.
- */
-
- space_to_add = round_page(size);
-
- if (new_space == 0) {
- /*
- * Memory cannot be wired down while holding
- * any locks that the pageout daemon might
- * need to free up pages. [Making the zget_space
- * lock a complex lock does not help in this
- * regard.]
- *
- * Unlock and allocate memory. Because several
- * threads might try to do this at once, don't
- * use the memory before checking for available
- * space again.
- */
-
- simple_unlock(&zget_space_lock);
-
- if (kmem_alloc_wired(zone_map,
- &new_space, space_to_add)
- != KERN_SUCCESS)
- return(0);
- zone_page_init(new_space, space_to_add,
- ZONE_PAGE_USED);
- simple_lock(&zget_space_lock);
- if (align > 0)
- ALIGN_SIZE_UP(zalloc_next_space, align);
- continue;
- }
-
-
- /*
- * Memory was allocated in a previous iteration.
- *
- * Check whether the new region is contiguous
- * with the old one.
- */
-
- if (new_space != zalloc_end_of_space) {
- /*
- * Throw away the remainder of the
- * old space, and start a new one.
- */
- zalloc_wasted_space +=
- zalloc_end_of_space - zalloc_next_space;
- zalloc_next_space = new_space;
- }
-
- zalloc_end_of_space = new_space + space_to_add;
-
- new_space = 0;
- }
- result = zalloc_next_space;
- zalloc_next_space += size;
- simple_unlock(&zget_space_lock);
-
- if (new_space != 0)
- kmem_free(zone_map, new_space, space_to_add);
-
- return(result);
-}
-
-
-/*
- * Initialize the "zone of zones" which uses fixed memory allocated
- * earlier in memory initialization. zone_bootstrap is called
- * before zone_init.
- */
-void zone_bootstrap(void)
-{
- simple_lock_init(&all_zones_lock);
- first_zone = ZONE_NULL;
- last_zone = &first_zone;
- num_zones = 0;
-
- simple_lock_init(&zget_space_lock);
- zalloc_next_space = zdata;
- zalloc_end_of_space = zdata + zdata_size;
- zalloc_wasted_space = 0;
-
- zone_zone = ZONE_NULL;
- zone_zone = zinit(sizeof(struct zone), 0, 128 * sizeof(struct zone),
- sizeof(struct zone), 0, "zones");
-}
-
-void zone_init(void)
-{
- vm_offset_t zone_min;
- vm_offset_t zone_max;
-
- vm_size_t zone_table_size;
-
- zone_map = kmem_suballoc(kernel_map, &zone_min, &zone_max,
- zone_map_size, FALSE);
-
- /*
- * Setup garbage collection information:
- */
-
- zone_table_size = atop(zone_max - zone_min) *
- sizeof(struct zone_page_table_entry);
- if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table,
- zone_table_size) != KERN_SUCCESS)
- panic("zone_init");
- zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size);
- zone_pages = atop(zone_max - zone_min);
- zone_map_min_address = zone_min;
- zone_map_max_address = zone_max;
- simple_lock_init(&zone_page_table_lock);
- zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED);
-}
-
-
-/*
- * zalloc returns an element from the specified zone.
- */
-vm_offset_t zalloc(zone_t zone)
-{
- vm_offset_t addr;
-
- if (zone == ZONE_NULL)
- panic ("zalloc: null zone");
-
- check_simple_locks();
-
- zone_lock(zone);
- REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
- while (addr == 0) {
- /*
- * If nothing was there, try to get more
- */
- if (zone->doing_alloc) {
- /*
- * Someone is allocating memory for this zone.
- * Wait for it to show up, then try again.
- */
- assert_wait((event_t)&zone->doing_alloc, TRUE);
- /* XXX say wakeup needed */
- zone_unlock(zone);
- thread_block((void (*)()) 0);
- zone_lock(zone);
- }
- else {
- if ((zone->cur_size + (zone->type & ZONE_PAGEABLE ?
- zone->alloc_size : zone->elem_size)) >
- zone->max_size) {
- if (zone->type & ZONE_EXHAUSTIBLE)
- break;
- /*
- * Printf calls logwakeup, which calls
- * select_wakeup which will do a zfree
- * (which tries to take the select_zone
- * lock... Hang. Release the lock now
- * so it can be taken again later.
- * NOTE: this used to be specific to
- * the select_zone, but for
- * cleanliness, we just unlock all
- * zones before this.
- */
- if (!(zone->type & ZONE_FIXED)) {
- /*
- * We're willing to overflow certain
- * zones, but not without complaining.
- *
- * This is best used in conjunction
- * with the collecatable flag. What we
- * want is an assurance we can get the
- * memory back, assuming there's no
- * leak.
- */
- zone->max_size += (zone->max_size >> 1);
- } else if (!zone_ignore_overflow) {
- zone_unlock(zone);
- printf("zone \"%s\" empty.\n",
- zone->zone_name);
- panic("zalloc: zone %s exhausted",
- zone->zone_name);
- }
- }
-
- if (zone->type & ZONE_PAGEABLE)
- zone->doing_alloc = TRUE;
- zone_unlock(zone);
-
- if (zone->type & ZONE_PAGEABLE) {
- if (kmem_alloc_pageable(zone_map, &addr,
- zone->alloc_size)
- != KERN_SUCCESS)
- panic("zalloc: no pageable memory for zone %s",
- zone->zone_name);
- zcram(zone, addr, zone->alloc_size);
- zone_lock(zone);
- zone->doing_alloc = FALSE;
- /* XXX check before doing this */
- thread_wakeup((event_t)&zone->doing_alloc);
-
- REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
- } else if (zone->type & ZONE_COLLECTABLE) {
- if (kmem_alloc_wired(zone_map,
- &addr, zone->alloc_size)
- != KERN_SUCCESS)
- panic("zalloc: no wired memory for zone %s",
- zone->zone_name);
- zone_page_init(addr, zone->alloc_size,
- ZONE_PAGE_USED);
- zcram(zone, addr, zone->alloc_size);
- zone_lock(zone);
- REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
- } else {
- addr = zget_space(zone->elem_size, zone->align);
- if (addr == 0)
- panic("zalloc: no memory for zone %s",
- zone->zone_name);
-
- zone_lock(zone);
- zone_count_up(zone);
- zone->cur_size += zone->elem_size;
- zone_unlock(zone);
- zone_page_alloc(addr, zone->elem_size);
- return(addr);
- }
- }
- }
-
- zone_unlock(zone);
- return(addr);
-}
-
-
-/*
- * zget returns an element from the specified zone
- * and immediately returns nothing if there is nothing there.
- *
- * This form should be used when you can not block (like when
- * processing an interrupt).
- */
-vm_offset_t zget(zone_t zone)
-{
- register vm_offset_t addr;
-
- if (zone == ZONE_NULL)
- panic ("zalloc: null zone");
-
- zone_lock(zone);
- REMOVE_FROM_ZONE(zone, addr, vm_offset_t);
- zone_unlock(zone);
-
- return(addr);
-}
-
-boolean_t zone_check = FALSE;
-
-void zfree(zone_t zone, vm_offset_t elem)
-{
- zone_lock(zone);
- if (zone_check) {
- vm_offset_t this;
-
- /* check the zone's consistency */
-
- for (this = zone->free_elements;
- this != 0;
- this = * (vm_offset_t *) this)
- if (this == elem)
- panic("zfree");
- }
- ADD_TO_ZONE(zone, elem);
- zone_unlock(zone);
-}
-
-/*
- * Zone garbage collection subroutines
- *
- * These routines have in common the modification of entries in the
- * zone_page_table. The latter contains one entry for every page
- * in the zone_map.
- *
- * For each page table entry in the given range:
- *
- * zone_page_in_use - decrements in_free_list
- * zone_page_free - increments in_free_list
- * zone_page_init - initializes in_free_list and alloc_count
- * zone_page_alloc - increments alloc_count
- * zone_page_dealloc - decrements alloc_count
- * zone_add_free_page_list - adds the page to the free list
- *
- * Two counts are maintained for each page, the in_free_list count and
- * alloc_count. The alloc_count is how many zone elements have been
- * allocated from a page. (Note that the page could contain elements
- * that span page boundaries. The count includes these elements so
- * one element may be counted in two pages.) In_free_list is a count
- * of how many zone elements are currently free. If in_free_list is
- * equal to alloc_count then the page is eligible for garbage
- * collection.
- *
- * Alloc_count and in_free_list are initialized to the correct values
- * for a particular zone when a page is zcram'ed into a zone. Subsequent
- * gets and frees of zone elements will call zone_page_in_use and
- * zone_page_free which modify the in_free_list count. When the zones
- * garbage collector runs it will walk through a zones free element list,
- * remove the elements that reside on collectable pages, and use
- * zone_add_free_page_list to create a list of pages to be collected.
- */
-
-void zone_page_in_use(addr, size)
-vm_offset_t addr;
-vm_size_t size;
-{
- int i, j;
- if ((addr < zone_map_min_address) ||
- (addr+size > zone_map_max_address)) return;
- i = atop(addr-zone_map_min_address);
- j = atop((addr+size-1) - zone_map_min_address);
- lock_zone_page_table();
- for (; i <= j; i++) {
- zone_page_table[i].in_free_list--;
- }
- unlock_zone_page_table();
-}
-
-void zone_page_free(addr, size)
-vm_offset_t addr;
-vm_size_t size;
-{
- int i, j;
- if ((addr < zone_map_min_address) ||
- (addr+size > zone_map_max_address)) return;
- i = atop(addr-zone_map_min_address);
- j = atop((addr+size-1) - zone_map_min_address);
- lock_zone_page_table();
- for (; i <= j; i++) {
- /* Set in_free_list to (ZONE_PAGE_USED + 1) if
- * it was previously set to ZONE_PAGE_UNUSED.
- */
- if (zone_page_table[i].in_free_list == ZONE_PAGE_UNUSED) {
- zone_page_table[i].in_free_list = 1;
- } else {
- zone_page_table[i].in_free_list++;
- }
- }
- unlock_zone_page_table();
-}
-
-void zone_page_init(addr, size, value)
-
-vm_offset_t addr;
-vm_size_t size;
-int value;
-{
- int i, j;
- if ((addr < zone_map_min_address) ||
- (addr+size > zone_map_max_address)) return;
- i = atop(addr-zone_map_min_address);
- j = atop((addr+size-1) - zone_map_min_address);
- lock_zone_page_table();
- for (; i <= j; i++) {
- zone_page_table[i].alloc_count = value;
- zone_page_table[i].in_free_list = 0;
- }
- unlock_zone_page_table();
-}
-
-void zone_page_alloc(addr, size)
-vm_offset_t addr;
-vm_size_t size;
-{
- int i, j;
- if ((addr < zone_map_min_address) ||
- (addr+size > zone_map_max_address)) return;
- i = atop(addr-zone_map_min_address);
- j = atop((addr+size-1) - zone_map_min_address);
- lock_zone_page_table();
- for (; i <= j; i++) {
- /* Set alloc_count to (ZONE_PAGE_USED + 1) if
- * it was previously set to ZONE_PAGE_UNUSED.
- */
- if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) {
- zone_page_table[i].alloc_count = 1;
- } else {
- zone_page_table[i].alloc_count++;
- }
- }
- unlock_zone_page_table();
-}
-
-void zone_page_dealloc(addr, size)
-vm_offset_t addr;
-vm_size_t size;
-{
- int i, j;
- if ((addr < zone_map_min_address) ||
- (addr+size > zone_map_max_address)) return;
- i = atop(addr-zone_map_min_address);
- j = atop((addr+size-1) - zone_map_min_address);
- lock_zone_page_table();
- for (; i <= j; i++) {
- zone_page_table[i].alloc_count--;
- }
- unlock_zone_page_table();
-}
-
-void
-zone_add_free_page_list(free_list, addr, size)
- struct zone_page_table_entry **free_list;
- vm_offset_t addr;
- vm_size_t size;
-{
- int i, j;
- if ((addr < zone_map_min_address) ||
- (addr+size > zone_map_max_address)) return;
- i = atop(addr-zone_map_min_address);
- j = atop((addr+size-1) - zone_map_min_address);
- lock_zone_page_table();
- for (; i <= j; i++) {
- if (zone_page_table[i].alloc_count == 0) {
- zone_page_table[i].next = *free_list;
- *free_list = &zone_page_table[i];
- zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED;
- zone_page_table[i].in_free_list = 0;
- }
- }
- unlock_zone_page_table();
-}
-
-
-/* This is used for walking through a zone's free element list.
- */
-struct zone_free_entry {
- struct zone_free_entry * next;
-};
-
-
-/* Zone garbage collection
- *
- * zone_gc will walk through all the free elements in all the
- * zones that are marked collectable looking for reclaimable
- * pages. zone_gc is called by consider_zone_gc when the system
- * begins to run out of memory.
- */
-static void zone_gc(void)
-{
- int max_zones;
- zone_t z;
- int i;
- register spl_t s;
- struct zone_page_table_entry *freep;
- struct zone_page_table_entry *zone_free_page_list;
-
- simple_lock(&all_zones_lock);
- max_zones = num_zones;
- z = first_zone;
- simple_unlock(&all_zones_lock);
-
- zone_free_page_list = (struct zone_page_table_entry *) 0;
-
- for (i = 0; i < max_zones; i++) {
- struct zone_free_entry * last;
- struct zone_free_entry * elt;
- assert(z != ZONE_NULL);
- /* run this at splhigh so that interupt routines that use zones
- can not interupt while their zone is locked */
- s=splhigh();
- zone_lock(z);
-
- if ((z->type & (ZONE_PAGEABLE|ZONE_COLLECTABLE)) == ZONE_COLLECTABLE) {
-
- /* Count the free elements in each page. This loop
- * requires that all in_free_list entries are zero.
- */
- elt = (struct zone_free_entry *)(z->free_elements);
- while ((elt != (struct zone_free_entry *)0)) {
- zone_page_free((vm_offset_t)elt, z->elem_size);
- elt = elt->next;
- }
-
- /* Now determine which elements should be removed
- * from the free list and, after all the elements
- * on a page have been removed, add the element's
- * page to a list of pages to be freed.
- */
- elt = (struct zone_free_entry *)(z->free_elements);
- last = elt;
- while ((elt != (struct zone_free_entry *)0)) {
- if (((vm_offset_t)elt>=zone_map_min_address)&&
- ((vm_offset_t)elt<=zone_map_max_address)&&
- (zone_page(elt)->in_free_list ==
- zone_page(elt)->alloc_count)) {
-
- z->cur_size -= z->elem_size;
- zone_page_in_use((vm_offset_t)elt, z->elem_size);
- zone_page_dealloc((vm_offset_t)elt, z->elem_size);
- if (zone_page(elt)->alloc_count == 0 ||
- zone_page(elt+(z->elem_size-1))->alloc_count==0) {
- zone_add_free_page_list(
- &zone_free_page_list,
- (vm_offset_t)elt, z->elem_size);
- }
-
-
- if (elt == last) {
- elt = elt->next;
- z->free_elements =(vm_offset_t)elt;
- last = elt;
- } else {
- last->next = elt->next;
- elt = elt->next;
- }
- } else {
- /* This element is not eligible for collection
- * so clear in_free_list in preparation for a
- * subsequent garbage collection pass.
- */
- if (((vm_offset_t)elt>=zone_map_min_address)&&
- ((vm_offset_t)elt<=zone_map_max_address)) {
- zone_page(elt)->in_free_list = 0;
- }
- last = elt;
- elt = elt->next;
- }
- }
- }
- zone_unlock(z);
- splx(s);
- simple_lock(&all_zones_lock);
- z = z->next_zone;
- simple_unlock(&all_zones_lock);
- }
-
- for (freep = zone_free_page_list; freep != 0; freep = freep->next) {
- vm_offset_t free_addr;
-
- free_addr = zone_map_min_address +
- PAGE_SIZE * (freep - zone_page_table);
-
- /* Hack Hack */
- /* Needed to make vm_map_delete's vm_map_clip_end always be
- * able to get an element without having to call zget_space and
- * hang because zone_map is already locked by vm_map_delete */
-
- extern zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
- vm_offset_t entry1 = zalloc(vm_map_kentry_zone),
- entry2 = zalloc(vm_map_kentry_zone);
- zfree(vm_map_kentry_zone, entry1);
- zfree(vm_map_kentry_zone, entry2);
-
- kmem_free(zone_map, free_addr, PAGE_SIZE);
- }
-}
-
-boolean_t zone_gc_allowed = TRUE;
-unsigned zone_gc_last_tick = 0;
-unsigned zone_gc_max_rate = 0; /* in ticks */
-
-/*
- * consider_zone_gc:
- *
- * Called by the pageout daemon when the system needs more free pages.
- */
-
-void
-consider_zone_gc(void)
-{
- /*
- * By default, don't attempt zone GC more frequently
- * than once a second.
- */
-
- if (zone_gc_max_rate == 0)
- zone_gc_max_rate = hz;
-
- if (zone_gc_allowed &&
- (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) {
- zone_gc_last_tick = sched_tick;
- zone_gc();
- }
-}
-
-#if MACH_DEBUG
-kern_return_t host_zone_info(host, namesp, namesCntp, infop, infoCntp)
- host_t host;
- zone_name_array_t *namesp;
- unsigned int *namesCntp;
- zone_info_array_t *infop;
- unsigned int *infoCntp;
-{
- zone_name_t *names;
- vm_offset_t names_addr;
- vm_size_t names_size = 0; /*'=0' to quiet gcc warnings */
- zone_info_t *info;
- vm_offset_t info_addr;
- vm_size_t info_size = 0; /*'=0' to quiet gcc warnings */
- unsigned int max_zones, i;
- zone_t z;
- kern_return_t kr;
-
- if (host == HOST_NULL)
- return KERN_INVALID_HOST;
-
- /*
- * We assume that zones aren't freed once allocated.
- * We won't pick up any zones that are allocated later.
- */
-
- simple_lock(&all_zones_lock);
- max_zones = num_zones;
- z = first_zone;
- simple_unlock(&all_zones_lock);
-
- if (max_zones <= *namesCntp) {
- /* use in-line memory */
-
- names = *namesp;
- } else {
- names_size = round_page(max_zones * sizeof *names);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &names_addr, names_size);
- if (kr != KERN_SUCCESS)
- return kr;
-
- names = (zone_name_t *) names_addr;
- }
-
- if (max_zones <= *infoCntp) {
- /* use in-line memory */
-
- info = *infop;
- } else {
- info_size = round_page(max_zones * sizeof *info);
- kr = kmem_alloc_pageable(ipc_kernel_map,
- &info_addr, info_size);
- if (kr != KERN_SUCCESS) {
- if (names != *namesp)
- kmem_free(ipc_kernel_map,
- names_addr, names_size);
- return kr;
- }
-
- info = (zone_info_t *) info_addr;
- }
-
- for (i = 0; i < max_zones; i++) {
- zone_name_t *zn = &names[i];
- zone_info_t *zi = &info[i];
- struct zone zcopy;
-
- assert(z != ZONE_NULL);
-
- zone_lock(z);
- zcopy = *z;
- zone_unlock(z);
-
- simple_lock(&all_zones_lock);
- z = z->next_zone;
- simple_unlock(&all_zones_lock);
-
- /* assuming here the name data is static */
- (void) strncpy(zn->zn_name, zcopy.zone_name,
- sizeof zn->zn_name);
-
-#ifdef ZONE_COUNT
- zi->zi_count = zcopy.count;
-#else
- zi->zi_count = 0;
-#endif
- zi->zi_cur_size = zcopy.cur_size;
- zi->zi_max_size = zcopy.max_size;
- zi->zi_elem_size = zcopy.elem_size;
- zi->zi_alloc_size = zcopy.alloc_size;
- zi->zi_pageable = (zcopy.type & ZONE_PAGEABLE) != 0;
- zi->zi_exhaustible = (zcopy.type & ZONE_EXHAUSTIBLE) != 0;
- zi->zi_collectable = (zcopy.type & ZONE_COLLECTABLE) != 0;
- }
-
- if (names != *namesp) {
- vm_size_t used;
- vm_map_copy_t copy;
-
- used = max_zones * sizeof *names;
-
- if (used != names_size)
- memset((char *) (names_addr + used), 0, names_size - used);
-
- kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size,
- TRUE, &copy);
- assert(kr == KERN_SUCCESS);
-
- *namesp = (zone_name_t *) copy;
- }
- *namesCntp = max_zones;
-
- if (info != *infop) {
- vm_size_t used;
- vm_map_copy_t copy;
-
- used = max_zones * sizeof *info;
-
- if (used != info_size)
- memset((char *) (info_addr + used), 0, info_size - used);
-
- kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size,
- TRUE, &copy);
- assert(kr == KERN_SUCCESS);
-
- *infop = (zone_info_t *) copy;
- }
- *infoCntp = max_zones;
-
- return KERN_SUCCESS;
-}
-#endif /* MACH_DEBUG */
diff --git a/kern/zalloc.h b/kern/zalloc.h
deleted file mode 100644
index f1a1850..0000000
--- a/kern/zalloc.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Mach Operating System
- * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University.
- * Copyright (c) 1993,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).
- * All rights reserved.
- *
- * Permission to use, copy, modify and distribute this software and its
- * documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
- * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
- * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
- * THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie Mellon
- * the rights to redistribute these changes.
- */
-/*
- * File: zalloc.h
- * Author: Avadis Tevanian, Jr.
- * Date: 1985
- *
- */
-
-#ifndef _KERN_ZALLOC_H_
-#define _KERN_ZALLOC_H_
-
-#include <mach/machine/vm_types.h>
-#include <kern/macro_help.h>
-#include <kern/lock.h>
-#include <kern/queue.h>
-#include <machine/zalloc.h>
-
-/*
- * A zone is a collection of fixed size blocks for which there
- * is fast allocation/deallocation access. Kernel routines can
- * use zones to manage data structures dynamically, creating a zone
- * for each type of data structure to be managed.
- *
- */
-
-struct zone {
- decl_simple_lock_data(,lock) /* generic lock */
-#ifdef ZONE_COUNT
- int count; /* Number of elements used now */
-#endif
- vm_offset_t free_elements;
- vm_size_t cur_size; /* current memory utilization */
- vm_size_t max_size; /* how large can this zone grow */
- vm_size_t elem_size; /* size of an element */
- vm_size_t align; /* alignment of elements */
- vm_size_t alloc_size; /* size used for more memory */
- boolean_t doing_alloc; /* is zone expanding now? */
- char *zone_name; /* a name for the zone */
- unsigned int type; /* type of memory */
- lock_data_t complex_lock; /* Lock for pageable zones */
- struct zone *next_zone; /* Link for all-zones list */
-};
-typedef struct zone *zone_t;
-
-#define ZONE_NULL ((zone_t) 0)
-
-/* Exported to everyone */
-zone_t zinit(vm_size_t size, vm_size_t align, vm_size_t max,
- vm_size_t alloc, unsigned int memtype, char *name);
-vm_offset_t zalloc(zone_t zone);
-vm_offset_t zget(zone_t zone);
-void zfree(zone_t zone, vm_offset_t elem);
-void zcram(zone_t zone, vm_offset_t newmem, vm_size_t size);
-
-/* Exported only to vm/vm_init.c */
-void zone_bootstrap(void);
-void zone_init(void);
-
-/* Exported only to vm/vm_pageout.c */
-void consider_zone_gc(void);
-
-
-/* Memory type bits for zones */
-#define ZONE_PAGEABLE 0x00000001
-#define ZONE_COLLECTABLE 0x00000002 /* Garbage-collect this zone when memory runs low */
-#define ZONE_EXHAUSTIBLE 0x00000004 /* zalloc() on this zone is allowed to fail */
-#define ZONE_FIXED 0x00000008 /* Panic if zone is exhausted (XXX) */
-
-/* Machine-dependent code can provide additional memory types. */
-#define ZONE_MACHINE_TYPES 0xffff0000
-
-
-#ifdef ZONE_COUNT
-#define zone_count_up(zone) ((zone)->count++)
-#define zone_count_down(zone) ((zone)->count--)
-#else
-#define zone_count_up(zone)
-#define zone_count_down(zone)
-#endif
-
-
-
-/* These quick inline versions only work for small, nonpageable zones (currently). */
-
-static __inline vm_offset_t ZALLOC(zone_t zone)
-{
- simple_lock(&zone->lock);
- if (zone->free_elements == 0) {
- simple_unlock(&zone->lock);
- return zalloc(zone);
- } else {
- vm_offset_t element = zone->free_elements;
- zone->free_elements = *((vm_offset_t *)(element));
- zone_count_up(zone);
- simple_unlock(&zone->lock);
- return element;
- }
-}
-
-static __inline void ZFREE(zone_t zone, vm_offset_t element)
-{
- *((vm_offset_t *)(element)) = zone->free_elements;
- zone->free_elements = (vm_offset_t) (element);
- zone_count_down(zone);
-}
-
-
-
-#endif /* _KERN_ZALLOC_H_ */
diff --git a/linux/dev/glue/block.c b/linux/dev/glue/block.c
index 0c76d3d..c7b3873 100644
--- a/linux/dev/glue/block.c
+++ b/linux/dev/glue/block.c
@@ -49,6 +49,8 @@
#include <mach/vm_param.h>
#include <mach/notify.h>
+#include <kern/kalloc.h>
+
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
diff --git a/linux/dev/glue/net.c b/linux/dev/glue/net.c
index 91ebf96..a60275f 100644
--- a/linux/dev/glue/net.c
+++ b/linux/dev/glue/net.c
@@ -69,6 +69,7 @@
#include <mach/vm_param.h>
#include <mach/notify.h>
+#include <kern/kalloc.h>
#include <kern/printf.h>
#include <ipc/ipc_port.h>
diff --git a/vm/memory_object_proxy.c b/vm/memory_object_proxy.c
index fdab6e0..4fed312 100644
--- a/vm/memory_object_proxy.c
+++ b/vm/memory_object_proxy.c
@@ -41,15 +41,14 @@
#include <mach/notify.h>
#include <mach/vm_prot.h>
#include <kern/printf.h>
-#include <kern/zalloc.h>
-#include <kern/mach_param.h>
+#include <kern/slab.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
#include <vm/memory_object_proxy.h>
-/* The zone which holds our proxy memory objects. */
-static zone_t memory_object_proxy_zone;
+/* The cache which holds our proxy memory objects. */
+static struct kmem_cache memory_object_proxy_cache;
struct memory_object_proxy
{
@@ -64,13 +63,8 @@ typedef struct memory_object_proxy *memory_object_proxy_t;
void
memory_object_proxy_init (void)
{
- /* For limit, see PORT_MAX. */
- memory_object_proxy_zone = zinit (sizeof (struct memory_object_proxy), 0,
- (TASK_MAX * 3 + THREAD_MAX)
- * sizeof (struct memory_object_proxy),
- 256 * sizeof (struct memory_object_proxy),
- ZONE_EXHAUSTIBLE,
- "proxy memory object zone");
+ kmem_cache_init (&memory_object_proxy_cache, "memory_object_proxy",
+ sizeof (struct memory_object_proxy), 0, NULL, NULL, NULL, 0);
}
/* Lookup a proxy memory object by its port. */
@@ -153,13 +147,13 @@ memory_object_create_proxy (ipc_space_t space, vm_prot_t max_protection,
if (start[0] != 0 || len[0] != (vm_offset_t) ~0)
return KERN_INVALID_ARGUMENT;
- proxy = (memory_object_proxy_t) zalloc (memory_object_proxy_zone);
+ proxy = (memory_object_proxy_t) kmem_cache_alloc (&memory_object_proxy_cache);
/* Allocate port, keeping a reference for it. */
proxy->port = ipc_port_alloc_kernel ();
if (proxy->port == IP_NULL)
{
- zfree (memory_object_proxy_zone, (vm_offset_t) proxy);
+ kmem_cache_free (&memory_object_proxy_cache, (vm_offset_t) proxy);
return KERN_RESOURCE_SHORTAGE;
}
/* Associate the port with the proxy memory object. */
diff --git a/vm/vm_external.c b/vm/vm_external.c
index ac47faa..e9643ff 100644
--- a/vm/vm_external.c
+++ b/vm/vm_external.c
@@ -31,7 +31,7 @@
*/
#include <mach/boolean.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <vm/vm_external.h>
#include <mach/vm_param.h>
#include <kern/assert.h>
@@ -40,7 +40,7 @@
boolean_t vm_external_unsafe = FALSE;
-zone_t vm_external_zone = ZONE_NULL;
+struct kmem_cache vm_external_cache;
/*
* The implementation uses bit arrays to record whether
@@ -52,8 +52,8 @@ zone_t vm_external_zone = ZONE_NULL;
#define SMALL_SIZE (VM_EXTERNAL_SMALL_SIZE/8)
#define LARGE_SIZE (VM_EXTERNAL_LARGE_SIZE/8)
-zone_t vm_object_small_existence_map_zone;
-zone_t vm_object_large_existence_map_zone;
+struct kmem_cache vm_object_small_existence_map_cache;
+struct kmem_cache vm_object_large_existence_map_cache;
vm_external_t vm_external_create(size)
@@ -62,20 +62,17 @@ vm_external_t vm_external_create(size)
vm_external_t result;
vm_size_t bytes;
- if (vm_external_zone == ZONE_NULL)
- return(VM_EXTERNAL_NULL);
-
- result = (vm_external_t) zalloc(vm_external_zone);
+ result = (vm_external_t) kmem_cache_alloc(&vm_external_cache);
result->existence_map = (char *) 0;
bytes = (atop(size) + 07) >> 3;
if (bytes <= SMALL_SIZE) {
result->existence_map =
- (char *) zalloc(vm_object_small_existence_map_zone);
+ (char *) kmem_cache_alloc(&vm_object_small_existence_map_cache);
result->existence_size = SMALL_SIZE;
} else if (bytes <= LARGE_SIZE) {
result->existence_map =
- (char *) zalloc(vm_object_large_existence_map_zone);
+ (char *) kmem_cache_alloc(&vm_object_large_existence_map_cache);
result->existence_size = LARGE_SIZE;
}
return(result);
@@ -89,14 +86,14 @@ void vm_external_destroy(e)
if (e->existence_map != (char *) 0) {
if (e->existence_size <= SMALL_SIZE) {
- zfree(vm_object_small_existence_map_zone,
+ kmem_cache_free(&vm_object_small_existence_map_cache,
(vm_offset_t) e->existence_map);
} else {
- zfree(vm_object_large_existence_map_zone,
+ kmem_cache_free(&vm_object_large_existence_map_cache,
(vm_offset_t) e->existence_map);
}
}
- zfree(vm_external_zone, (vm_offset_t) e);
+ kmem_cache_free(&vm_external_cache, (vm_offset_t) e);
}
vm_external_state_t _vm_external_state_get(e, offset)
@@ -142,18 +139,14 @@ void vm_external_module_initialize(void)
{
vm_size_t size = (vm_size_t) sizeof(struct vm_external);
- vm_external_zone = zinit(size, 0, 16*1024*size, size,
- 0, "external page bitmaps");
+ kmem_cache_init(&vm_external_cache, "vm_external", size, 0,
+ NULL, NULL, NULL, 0);
- vm_object_small_existence_map_zone = zinit(SMALL_SIZE, 0,
- round_page(LARGE_SIZE * SMALL_SIZE),
- round_page(SMALL_SIZE),
- ZONE_EXHAUSTIBLE,
- "object small existence maps");
+ kmem_cache_init(&vm_object_small_existence_map_cache,
+ "small_existence_map", SMALL_SIZE, 0,
+ NULL, NULL, NULL, 0);
- vm_object_large_existence_map_zone = zinit(LARGE_SIZE, 0,
- round_page(8 * LARGE_SIZE),
- round_page(LARGE_SIZE),
- ZONE_EXHAUSTIBLE,
- "object large existence maps");
+ kmem_cache_init(&vm_object_large_existence_map_cache,
+ "large_existence_map", LARGE_SIZE, 0,
+ NULL, NULL, NULL, 0);
}
diff --git a/vm/vm_fault.c b/vm/vm_fault.c
index cce043a..10955ed 100644
--- a/vm/vm_fault.c
+++ b/vm/vm_fault.c
@@ -51,9 +51,8 @@
#include <mach/memory_object.h>
#include <vm/memory_object_user.user.h>
/* For memory_object_data_{request,unlock} */
-#include <kern/mach_param.h>
#include <kern/macro_help.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#if MACH_PCSAMPLE
#include <kern/pc_sample.h>
@@ -85,7 +84,7 @@ typedef struct vm_fault_state {
vm_prot_t vmfp_access;
} vm_fault_state_t;
-zone_t vm_fault_state_zone = 0;
+struct kmem_cache vm_fault_state_cache;
int vm_object_absent_max = 50;
@@ -107,10 +106,8 @@ extern struct db_watchpoint *db_watchpoint_list;
*/
void vm_fault_init(void)
{
- vm_fault_state_zone = zinit(sizeof(vm_fault_state_t), 0,
- THREAD_MAX * sizeof(vm_fault_state_t),
- sizeof(vm_fault_state_t),
- 0, "vm fault state");
+ kmem_cache_init(&vm_fault_state_cache, "vm_fault_state",
+ sizeof(vm_fault_state_t), 0, NULL, NULL, NULL, 0);
}
/*
@@ -1206,12 +1203,12 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring,
/*
* if this assignment stmt is written as
- * 'active_threads[cpu_number()] = zalloc()',
- * cpu_number may be evaluated before zalloc;
- * if zalloc blocks, cpu_number will be wrong
+ * 'active_threads[cpu_number()] = kmem_cache_alloc()',
+ * cpu_number may be evaluated before kmem_cache_alloc;
+ * if kmem_cache_alloc blocks, cpu_number will be wrong
*/
- state = (char *) zalloc(vm_fault_state_zone);
+ state = (char *) kmem_cache_alloc(&vm_fault_state_cache);
current_thread()->ith_other = state;
}
@@ -1490,7 +1487,7 @@ kern_return_t vm_fault(map, vaddr, fault_type, change_wiring,
register vm_fault_state_t *state =
(vm_fault_state_t *) current_thread()->ith_other;
- zfree(vm_fault_state_zone, (vm_offset_t) state);
+ kmem_cache_free(&vm_fault_state_cache, (vm_offset_t) state);
(*continuation)(kr);
/*NOTREACHED*/
}
diff --git a/vm/vm_init.c b/vm/vm_init.c
index 33fca65..89eb098 100644
--- a/vm/vm_init.c
+++ b/vm/vm_init.c
@@ -35,7 +35,7 @@
*/
#include <mach/machine/vm_types.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <kern/kalloc.h>
#include <vm/vm_fault.h>
#include <vm/vm_object.h>
@@ -67,12 +67,12 @@ void vm_mem_bootstrap()
* Initialize other VM packages
*/
- zone_bootstrap();
+ slab_bootstrap();
vm_object_bootstrap();
vm_map_init();
kmem_init(start, end);
pmap_init();
- zone_init();
+ slab_init();
kalloc_init();
vm_fault_init();
vm_page_module_init();
diff --git a/vm/vm_kern.c b/vm/vm_kern.c
index cfa33ff..fd46e98 100644
--- a/vm/vm_kern.c
+++ b/vm/vm_kern.c
@@ -58,7 +58,8 @@
* Variables exported by this module.
*/
-vm_map_t kernel_map;
+static struct vm_map kernel_map_store;
+vm_map_t kernel_map = &kernel_map_store;
vm_map_t kernel_pageable_map;
extern void kmem_alloc_pages();
@@ -811,27 +812,27 @@ kmem_remap_pages(object, offset, start, end, protection)
}
/*
- * kmem_suballoc:
+ * kmem_submap:
*
- * Allocates a map to manage a subrange
+ * Initializes a map to manage a subrange
* of the kernel virtual address space.
*
* Arguments are as follows:
*
+ * map Map to initialize
* parent Map to take range from
* size Size of range to find
* min, max Returned endpoints of map
* pageable Can the region be paged
*/
-vm_map_t
-kmem_suballoc(parent, min, max, size, pageable)
- vm_map_t parent;
+void
+kmem_submap(map, parent, min, max, size, pageable)
+ vm_map_t map, parent;
vm_offset_t *min, *max;
vm_size_t size;
boolean_t pageable;
{
- vm_map_t map;
vm_offset_t addr;
kern_return_t kr;
@@ -850,20 +851,16 @@ kmem_suballoc(parent, min, max, size, pageable)
vm_submap_object, (vm_offset_t) 0, FALSE,
VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS)
- panic("kmem_suballoc");
+ panic("kmem_submap");
pmap_reference(vm_map_pmap(parent));
- map = vm_map_create(vm_map_pmap(parent), addr, addr + size, pageable);
- if (map == VM_MAP_NULL)
- panic("kmem_suballoc");
-
+ vm_map_setup(map, vm_map_pmap(parent), addr, addr + size, pageable);
kr = vm_map_submap(parent, addr, addr + size, map);
if (kr != KERN_SUCCESS)
- panic("kmem_suballoc");
+ panic("kmem_submap");
*min = addr;
*max = addr + size;
- return map;
}
/*
@@ -876,9 +873,8 @@ void kmem_init(start, end)
vm_offset_t start;
vm_offset_t end;
{
- kernel_map = vm_map_create(pmap_kernel(),
- VM_MIN_KERNEL_ADDRESS, end,
- FALSE);
+ vm_map_setup(kernel_map, pmap_kernel(), VM_MIN_KERNEL_ADDRESS, end,
+ FALSE);
/*
* Reserve virtual memory allocated up to this time.
diff --git a/vm/vm_kern.h b/vm/vm_kern.h
index ca93d7a..22b7c12 100644
--- a/vm/vm_kern.h
+++ b/vm/vm_kern.h
@@ -58,8 +58,8 @@ extern kern_return_t kmem_realloc(vm_map_t, vm_offset_t, vm_size_t,
vm_offset_t *, vm_size_t);
extern void kmem_free(vm_map_t, vm_offset_t, vm_size_t);
-extern vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *,
- vm_size_t, boolean_t);
+extern void kmem_submap(vm_map_t, vm_map_t, vm_offset_t *,
+ vm_offset_t *, vm_size_t, boolean_t);
extern kern_return_t kmem_io_map_copyout(vm_map_t, vm_offset_t *,
vm_offset_t *, vm_size_t *,
diff --git a/vm/vm_map.c b/vm/vm_map.c
index ce83403..de10eec 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -41,7 +41,8 @@
#include <mach/vm_param.h>
#include <kern/assert.h>
#include <kern/debug.h>
-#include <kern/zalloc.h>
+#include <kern/kalloc.h>
+#include <kern/slab.h>
#include <vm/pmap.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
@@ -70,7 +71,7 @@ void vm_map_copy_page_discard (vm_map_copy_t copy);
* map entry to the same memory - the wired count in the new entry
* must be set to zero. vm_map_entry_copy_full() creates a new
* entry that is identical to the old entry. This preserves the
- * wire count; it's used for map splitting and zone changing in
+ * wire count; it's used for map splitting and cache changing in
* vm_map_copyout.
*/
#define vm_map_entry_copy(NEW,OLD) \
@@ -130,10 +131,10 @@ MACRO_END
* vm_object_copy_strategically() in vm_object.c.
*/
-zone_t vm_map_zone; /* zone for vm_map structures */
-zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
-zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
-zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
+struct kmem_cache vm_map_cache; /* cache for vm_map structures */
+struct kmem_cache vm_map_entry_cache; /* cache for vm_map_entry structures */
+struct kmem_cache vm_map_kentry_cache; /* cache for kernel entry structures */
+struct kmem_cache vm_map_copy_cache; /* cache for vm_map_copy structures */
boolean_t vm_map_lookup_entry(); /* forward declaration */
@@ -143,7 +144,8 @@ boolean_t vm_map_lookup_entry(); /* forward declaration */
* vm_map_submap creates the submap.
*/
-vm_object_t vm_submap_object;
+static struct vm_object vm_submap_object_store;
+vm_object_t vm_submap_object = &vm_submap_object_store;
/*
* vm_map_init:
@@ -151,51 +153,81 @@ vm_object_t vm_submap_object;
* Initialize the vm_map module. Must be called before
* any other vm_map routines.
*
- * Map and entry structures are allocated from zones -- we must
- * initialize those zones.
+ * Map and entry structures are allocated from caches -- we must
+ * initialize those caches.
*
- * There are three zones of interest:
+ * There are three caches of interest:
*
- * vm_map_zone: used to allocate maps.
- * vm_map_entry_zone: used to allocate map entries.
- * vm_map_kentry_zone: used to allocate map entries for the kernel.
+ * vm_map_cache: used to allocate maps.
+ * vm_map_entry_cache: used to allocate map entries.
+ * vm_map_kentry_cache: used to allocate map entries for the kernel.
*
- * The kernel allocates map entries from a special zone that is initially
- * "crammed" with memory. It would be difficult (perhaps impossible) for
- * the kernel to allocate more memory to a entry zone when it became
- * empty since the very act of allocating memory implies the creation
- * of a new entry.
+ * Kernel map entries are allocated from a special cache, using a custom
+ * page allocation function to avoid recursion. It would be difficult
+ * (perhaps impossible) for the kernel to allocate more memory to an entry
+ * cache when it became empty since the very act of allocating memory
+ * implies the creation of a new entry.
*/
vm_offset_t kentry_data;
-vm_size_t kentry_data_size;
-int kentry_count = 256; /* to init kentry_data_size */
+vm_size_t kentry_data_size = 32 * PAGE_SIZE;
-void vm_map_init(void)
+static vm_offset_t kentry_pagealloc(vm_size_t size)
{
- vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 0, 40*1024,
- PAGE_SIZE, 0, "maps");
- vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
- 0, 1024*1024, PAGE_SIZE*5,
- 0, "non-kernel map entries");
- vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry), 0,
- kentry_data_size, kentry_data_size,
- ZONE_FIXED /* XXX */, "kernel map entries");
-
- vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy),
- 0, 16*1024, PAGE_SIZE, 0,
- "map copies");
+ vm_offset_t result;
- /*
- * Cram the kentry zone with initial data.
- */
- zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
+ if (size > kentry_data_size)
+ panic("vm_map: kentry memory exhausted");
+
+ result = kentry_data;
+ kentry_data += size;
+ kentry_data_size -= size;
+ return result;
+}
+
+void vm_map_init(void)
+{
+ kmem_cache_init(&vm_map_cache, "vm_map", sizeof(struct vm_map), 0,
+ NULL, NULL, NULL, 0);
+ kmem_cache_init(&vm_map_entry_cache, "vm_map_entry",
+ sizeof(struct vm_map_entry), 0, NULL, NULL, NULL, 0);
+ kmem_cache_init(&vm_map_kentry_cache, "vm_map_kentry",
+ sizeof(struct vm_map_entry), 0, NULL, kentry_pagealloc,
+ NULL, KMEM_CACHE_NOCPUPOOL | KMEM_CACHE_NOOFFSLAB
+ | KMEM_CACHE_NORECLAIM);
+ kmem_cache_init(&vm_map_copy_cache, "vm_map_copy",
+ sizeof(struct vm_map_copy), 0, NULL, NULL, NULL, 0);
/*
* Submap object is initialized by vm_object_init.
*/
}
+void vm_map_setup(map, pmap, min, max, pageable)
+ vm_map_t map;
+ pmap_t pmap;
+ vm_offset_t min, max;
+ boolean_t pageable;
+{
+ vm_map_first_entry(map) = vm_map_to_entry(map);
+ vm_map_last_entry(map) = vm_map_to_entry(map);
+ map->hdr.nentries = 0;
+ map->hdr.entries_pageable = pageable;
+
+ map->size = 0;
+ map->ref_count = 1;
+ map->pmap = pmap;
+ map->min_offset = min;
+ map->max_offset = max;
+ map->wiring_required = FALSE;
+ map->wait_for_space = FALSE;
+ map->first_free = vm_map_to_entry(map);
+ map->hint = vm_map_to_entry(map);
+ vm_map_lock_init(map);
+ simple_lock_init(&map->ref_lock);
+ simple_lock_init(&map->hint_lock);
+}
+
/*
* vm_map_create:
*
@@ -210,27 +242,11 @@ vm_map_t vm_map_create(pmap, min, max, pageable)
{
register vm_map_t result;
- result = (vm_map_t) zalloc(vm_map_zone);
+ result = (vm_map_t) kmem_cache_alloc(&vm_map_cache);
if (result == VM_MAP_NULL)
panic("vm_map_create");
- vm_map_first_entry(result) = vm_map_to_entry(result);
- vm_map_last_entry(result) = vm_map_to_entry(result);
- result->hdr.nentries = 0;
- result->hdr.entries_pageable = pageable;
-
- result->size = 0;
- result->ref_count = 1;
- result->pmap = pmap;
- result->min_offset = min;
- result->max_offset = max;
- result->wiring_required = FALSE;
- result->wait_for_space = FALSE;
- result->first_free = vm_map_to_entry(result);
- result->hint = vm_map_to_entry(result);
- vm_map_lock_init(result);
- simple_lock_init(&result->ref_lock);
- simple_lock_init(&result->hint_lock);
+ vm_map_setup(result, pmap, min, max, pageable);
return(result);
}
@@ -250,15 +266,15 @@ vm_map_t vm_map_create(pmap, min, max, pageable)
vm_map_entry_t _vm_map_entry_create(map_header)
register struct vm_map_header *map_header;
{
- register zone_t zone;
+ register kmem_cache_t cache;
register vm_map_entry_t entry;
if (map_header->entries_pageable)
- zone = vm_map_entry_zone;
+ cache = &vm_map_entry_cache;
else
- zone = vm_map_kentry_zone;
+ cache = &vm_map_kentry_cache;
- entry = (vm_map_entry_t) zalloc(zone);
+ entry = (vm_map_entry_t) kmem_cache_alloc(cache);
if (entry == VM_MAP_ENTRY_NULL)
panic("vm_map_entry_create");
@@ -280,14 +296,14 @@ void _vm_map_entry_dispose(map_header, entry)
register struct vm_map_header *map_header;
register vm_map_entry_t entry;
{
- register zone_t zone;
+ register kmem_cache_t cache;
if (map_header->entries_pageable)
- zone = vm_map_entry_zone;
+ cache = &vm_map_entry_cache;
else
- zone = vm_map_kentry_zone;
+ cache = &vm_map_kentry_cache;
- zfree(zone, (vm_offset_t) entry);
+ kmem_cache_free(cache, (vm_offset_t) entry);
}
/*
@@ -368,7 +384,7 @@ void vm_map_deallocate(map)
pmap_destroy(map->pmap);
- zfree(vm_map_zone, (vm_offset_t) map);
+ kmem_cache_free(&vm_map_cache, (vm_offset_t) map);
}
/*
@@ -1907,7 +1923,7 @@ free_next_copy:
register vm_map_copy_t new_copy;
new_copy = (vm_map_copy_t) copy->cpy_cont_args;
- zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
copy = new_copy;
goto free_next_copy;
}
@@ -1918,7 +1934,7 @@ free_next_copy:
break;
}
- zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
}
/*
@@ -1952,7 +1968,7 @@ vm_map_copy_copy(copy)
* from the old one into it.
*/
- new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ new_copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
*new_copy = *copy;
if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
@@ -2160,7 +2176,7 @@ start_pass_1:
/*
* XXXO If there are no permanent objects in the destination,
- * XXXO and the source and destination map entry zones match,
+ * XXXO and the source and destination map entry caches match,
* XXXO and the destination map entry is not shared,
* XXXO then the map entries can be deleted and replaced
* XXXO with those from the copy. The following code is the
@@ -2403,7 +2419,7 @@ start_pass_1:
((where)->vme_next = vm_map_copy_first_entry(copy)) \
->vme_prev = (where); \
(map)->hdr.nentries += (copy)->cpy_hdr.nentries; \
- zfree(vm_map_copy_zone, (vm_offset_t) copy); \
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy); \
MACRO_END
/*
@@ -2459,7 +2475,7 @@ kern_return_t vm_map_copyout(dst_map, dst_addr, copy)
VM_INHERIT_DEFAULT);
if (kr != KERN_SUCCESS)
return(kr);
- zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
return(KERN_SUCCESS);
}
@@ -2516,15 +2532,15 @@ kern_return_t vm_map_copyout(dst_map, dst_addr, copy)
* Mismatches occur when dealing with the default
* pager.
*/
- zone_t old_zone;
+ kmem_cache_t old_cache;
vm_map_entry_t next, new;
/*
- * Find the zone that the copies were allocated from
+ * Find the cache that the copies were allocated from
*/
- old_zone = (copy->cpy_hdr.entries_pageable)
- ? vm_map_entry_zone
- : vm_map_kentry_zone;
+ old_cache = (copy->cpy_hdr.entries_pageable)
+ ? &vm_map_entry_cache
+ : &vm_map_kentry_cache;
entry = vm_map_copy_first_entry(copy);
/*
@@ -2547,7 +2563,7 @@ kern_return_t vm_map_copyout(dst_map, dst_addr, copy)
vm_map_copy_last_entry(copy),
new);
next = entry->vme_next;
- zfree(old_zone, (vm_offset_t) entry);
+ kmem_cache_free(old_cache, (vm_offset_t) entry);
entry = next;
}
}
@@ -3036,10 +3052,10 @@ error:
* Consume on success logic.
*/
if (copy != orig_copy) {
- zfree(vm_map_copy_zone, (vm_offset_t) copy);
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) copy);
}
if (result == KERN_SUCCESS) {
- zfree(vm_map_copy_zone, (vm_offset_t) orig_copy);
+ kmem_cache_free(&vm_map_copy_cache, (vm_offset_t) orig_copy);
}
return(result);
@@ -3116,7 +3132,7 @@ kern_return_t vm_map_copyin(src_map, src_addr, len, src_destroy, copy_result)
* remember the endpoints prior to rounding.
*/
- copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
copy->type = VM_MAP_COPY_ENTRY_LIST;
@@ -3443,7 +3459,7 @@ kern_return_t vm_map_copyin_object(object, offset, size, copy_result)
* and null links.
*/
- copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
vm_map_copy_first_entry(copy) =
vm_map_copy_last_entry(copy) = VM_MAP_ENTRY_NULL;
copy->type = VM_MAP_COPY_OBJECT;
@@ -3598,7 +3614,7 @@ kern_return_t vm_map_copyin_page_list(src_map, src_addr, len, src_destroy,
* be page-aligned.
*/
- copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
+ copy = (vm_map_copy_t) kmem_cache_alloc(&vm_map_copy_cache);
copy->type = VM_MAP_COPY_PAGE_LIST;
copy->cpy_npages = 0;
copy->offset = src_addr;
diff --git a/vm/vm_map.h b/vm/vm_map.h
index 567fe93..f4e9395 100644
--- a/vm/vm_map.h
+++ b/vm/vm_map.h
@@ -357,6 +357,9 @@ extern int kentry_count;
/* Initialize the module */
extern void vm_map_init(void);
+/* Initialize an empty map */
+extern void vm_map_setup(vm_map_t, pmap_t, vm_offset_t, vm_offset_t,
+ boolean_t);
/* Create an empty map */
extern vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t,
boolean_t);
diff --git a/vm/vm_object.c b/vm/vm_object.c
index 9057973..d80124a 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -47,7 +47,7 @@
#include <kern/lock.h>
#include <kern/queue.h>
#include <kern/xpr.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <vm/memory_object.h>
#include <vm/vm_fault.h>
#include <vm/vm_map.h>
@@ -141,13 +141,14 @@ void vm_object_deactivate_pages(vm_object_t);
* ZZZ Continue this comment.
*/
-zone_t vm_object_zone; /* vm backing store zone */
+struct kmem_cache vm_object_cache; /* vm backing store cache */
/*
* All wired-down kernel memory belongs to a single virtual
* memory object (kernel_object) to avoid wasting data structures.
*/
-vm_object_t kernel_object;
+static struct vm_object kernel_object_store;
+vm_object_t kernel_object = &kernel_object_store;
/*
* Virtual memory objects that are not referenced by
@@ -198,7 +199,7 @@ decl_simple_lock_data(,vm_object_cached_lock_data)
* object structure, be sure to add initialization
* (see vm_object_init).
*/
-vm_object_t vm_object_template;
+struct vm_object vm_object_template;
/*
* vm_object_allocate:
@@ -206,17 +207,24 @@ vm_object_t vm_object_template;
* Returns a new object with the given size.
*/
+static void _vm_object_setup(
+ vm_object_t object,
+ vm_size_t size)
+{
+ *object = vm_object_template;
+ queue_init(&object->memq);
+ vm_object_lock_init(object);
+ object->size = size;
+}
+
vm_object_t _vm_object_allocate(
vm_size_t size)
{
register vm_object_t object;
- object = (vm_object_t) zalloc(vm_object_zone);
+ object = (vm_object_t) kmem_cache_alloc(&vm_object_cache);
- *object = *vm_object_template;
- queue_init(&object->memq);
- vm_object_lock_init(object);
- object->size = size;
+ _vm_object_setup(object, size);
return object;
}
@@ -244,10 +252,8 @@ vm_object_t vm_object_allocate(
*/
void vm_object_bootstrap(void)
{
- vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), 0,
- round_page(512*1024),
- round_page(12*1024),
- 0, "objects");
+ kmem_cache_init(&vm_object_cache, "vm_object",
+ sizeof(struct vm_object), 0, NULL, NULL, NULL, 0);
queue_init(&vm_object_cached_list);
simple_lock_init(&vm_object_cached_lock_data);
@@ -256,53 +262,50 @@ void vm_object_bootstrap(void)
* Fill in a template object, for quick initialization
*/
- vm_object_template = (vm_object_t) zalloc(vm_object_zone);
- memset(vm_object_template, 0, sizeof *vm_object_template);
-
- vm_object_template->ref_count = 1;
- vm_object_template->size = 0;
- vm_object_template->resident_page_count = 0;
- vm_object_template->copy = VM_OBJECT_NULL;
- vm_object_template->shadow = VM_OBJECT_NULL;
- vm_object_template->shadow_offset = (vm_offset_t) 0;
+ vm_object_template.ref_count = 1;
+ vm_object_template.size = 0;
+ vm_object_template.resident_page_count = 0;
+ vm_object_template.copy = VM_OBJECT_NULL;
+ vm_object_template.shadow = VM_OBJECT_NULL;
+ vm_object_template.shadow_offset = (vm_offset_t) 0;
- vm_object_template->pager = IP_NULL;
- vm_object_template->paging_offset = 0;
- vm_object_template->pager_request = PAGER_REQUEST_NULL;
- vm_object_template->pager_name = IP_NULL;
+ vm_object_template.pager = IP_NULL;
+ vm_object_template.paging_offset = 0;
+ vm_object_template.pager_request = PAGER_REQUEST_NULL;
+ vm_object_template.pager_name = IP_NULL;
- vm_object_template->pager_created = FALSE;
- vm_object_template->pager_initialized = FALSE;
- vm_object_template->pager_ready = FALSE;
+ vm_object_template.pager_created = FALSE;
+ vm_object_template.pager_initialized = FALSE;
+ vm_object_template.pager_ready = FALSE;
- vm_object_template->copy_strategy = MEMORY_OBJECT_COPY_NONE;
+ vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_NONE;
/* ignored if temporary, will be reset before
* permanent object becomes ready */
- vm_object_template->use_shared_copy = FALSE;
- vm_object_template->shadowed = FALSE;
-
- vm_object_template->absent_count = 0;
- vm_object_template->all_wanted = 0; /* all bits FALSE */
-
- vm_object_template->paging_in_progress = 0;
- vm_object_template->can_persist = FALSE;
- vm_object_template->internal = TRUE;
- vm_object_template->temporary = TRUE;
- vm_object_template->alive = TRUE;
- vm_object_template->lock_in_progress = FALSE;
- vm_object_template->lock_restart = FALSE;
- vm_object_template->use_old_pageout = TRUE; /* XXX change later */
- vm_object_template->last_alloc = (vm_offset_t) 0;
+ vm_object_template.use_shared_copy = FALSE;
+ vm_object_template.shadowed = FALSE;
+
+ vm_object_template.absent_count = 0;
+ vm_object_template.all_wanted = 0; /* all bits FALSE */
+
+ vm_object_template.paging_in_progress = 0;
+ vm_object_template.can_persist = FALSE;
+ vm_object_template.internal = TRUE;
+ vm_object_template.temporary = TRUE;
+ vm_object_template.alive = TRUE;
+ vm_object_template.lock_in_progress = FALSE;
+ vm_object_template.lock_restart = FALSE;
+ vm_object_template.use_old_pageout = TRUE; /* XXX change later */
+ vm_object_template.last_alloc = (vm_offset_t) 0;
#if MACH_PAGEMAP
- vm_object_template->existence_info = VM_EXTERNAL_NULL;
+ vm_object_template.existence_info = VM_EXTERNAL_NULL;
#endif /* MACH_PAGEMAP */
/*
* Initialize the "kernel object"
*/
- kernel_object = _vm_object_allocate(
+ _vm_object_setup(kernel_object,
VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
/*
@@ -310,7 +313,7 @@ void vm_object_bootstrap(void)
* kernel object so that no limit is imposed on submap sizes.
*/
- vm_submap_object = _vm_object_allocate(
+ _vm_object_setup(vm_submap_object,
VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS);
#if MACH_PAGEMAP
@@ -660,7 +663,7 @@ void vm_object_terminate(
* Free the space for the object.
*/
- zfree(vm_object_zone, (vm_offset_t) object);
+ kmem_cache_free(&vm_object_cache, (vm_offset_t) object);
}
/*
@@ -2618,7 +2621,7 @@ void vm_object_collapse(
vm_object_unlock(object);
if (old_name_port != IP_NULL)
ipc_port_dealloc_kernel(old_name_port);
- zfree(vm_object_zone, (vm_offset_t) backing_object);
+ kmem_cache_free(&vm_object_cache, (vm_offset_t) backing_object);
vm_object_lock(object);
object_collapses++;
diff --git a/vm/vm_page.h b/vm/vm_page.h
index f13b0af..4536d1c 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -41,7 +41,6 @@
#include <vm/vm_types.h>
#include <kern/queue.h>
#include <kern/lock.h>
-#include <kern/zalloc.h>
#include <kern/macro_help.h>
#include <kern/sched_prim.h> /* definitions of wait/wakeup */
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index 7a755bf..77c1cfe 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -43,6 +43,7 @@
#include <mach/vm_statistics.h>
#include <kern/counters.h>
#include <kern/debug.h>
+#include <kern/slab.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <vm/pmap.h>
@@ -544,8 +545,8 @@ void vm_pageout_scan()
* into an internal object and then immediately double-page it,
* sending it to the default pager.
*
- * consider_zone_gc should be last, because the other operations
- * might return memory to zones. When we pause we use
+ * slab_collect should be last, because the other operations
+ * might return memory to caches. When we pause we use
* vm_pageout_scan_continue as our continuation, so we will
* reenter vm_pageout_scan periodically and attempt to reclaim
* internal memory even if we never reach vm_page_free_target.
@@ -555,7 +556,7 @@ void vm_pageout_scan()
net_kmsg_collect();
consider_task_collect();
consider_thread_collect();
- consider_zone_gc();
+ slab_collect();
for (burst_count = 0;;) {
register vm_page_t m;
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index 96354a4..ae71a74 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -45,7 +45,7 @@
#include <mach/vm_statistics.h>
#include <machine/vm_param.h>
#include <kern/xpr.h>
-#include <kern/zalloc.h>
+#include <kern/slab.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
@@ -58,10 +58,6 @@
#include <vm/vm_user.h>
#endif
-/* in zalloc.c XXX */
-extern vm_offset_t zdata;
-extern vm_size_t zdata_size;
-
/*
* Associated with eacn page of user-allocatable memory is a
* page structure.
@@ -126,7 +122,7 @@ unsigned int vm_page_free_count_minimum; /* debugging */
* These page structures are allocated the way
* most other kernel structures are.
*/
-zone_t vm_page_zone;
+struct kmem_cache vm_page_cache;
/*
* Fictitious pages don't have a physical address,
@@ -239,14 +235,11 @@ void vm_page_bootstrap(
vm_page_free_wanted = 0;
/*
- * Steal memory for the zone system.
+ * Steal memory for the kernel map entries.
*/
- kentry_data_size = kentry_count * sizeof(struct vm_map_entry);
kentry_data = pmap_steal_memory(kentry_data_size);
- zdata = pmap_steal_memory(zdata_size);
-
/*
* Allocate (and initialize) the virtual-to-physical
* table hash buckets.
@@ -430,10 +423,8 @@ void pmap_startup(
*/
void vm_page_module_init(void)
{
- vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page), 0,
- VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
- PAGE_SIZE,
- 0, "vm pages");
+ kmem_cache_init(&vm_page_cache, "vm_page", sizeof(struct vm_page), 0,
+ NULL, NULL, NULL, 0);
}
/*
@@ -455,7 +446,7 @@ void vm_page_create(
for (paddr = round_page(start);
paddr < trunc_page(end);
paddr += PAGE_SIZE) {
- m = (vm_page_t) zalloc(vm_page_zone);
+ m = (vm_page_t) kmem_cache_alloc(&vm_page_cache);
if (m == VM_PAGE_NULL)
panic("vm_page_create");
@@ -810,7 +801,7 @@ void vm_page_more_fictitious(void)
int i;
for (i = 0; i < vm_page_fictitious_quantum; i++) {
- m = (vm_page_t) zalloc(vm_page_zone);
+ m = (vm_page_t) kmem_cache_alloc(&vm_page_cache);
if (m == VM_PAGE_NULL)
panic("vm_page_more_fictitious");
diff --git a/xen/block.c b/xen/block.c
index 02d410f..fb18b67 100644
--- a/xen/block.c
+++ b/xen/block.c
@@ -18,6 +18,7 @@
#include <sys/types.h>
#include <mach/mig_errors.h>
+#include <kern/kalloc.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
#include <vm/vm_kern.h>
diff --git a/xen/net.c b/xen/net.c
index 0f14773..10e4bbe 100644
--- a/xen/net.c
+++ b/xen/net.c
@@ -18,6 +18,7 @@
#include <sys/types.h>
#include <mach/mig_errors.h>
+#include <kern/kalloc.h>
#include <ipc/ipc_port.h>
#include <ipc/ipc_space.h>
#include <vm/vm_kern.h>
diff --git a/xen/store.c b/xen/store.c
index 94d0ae4..8796bb5 100644
--- a/xen/store.c
+++ b/xen/store.c
@@ -20,6 +20,7 @@
#include <mach/mig_support.h>
#include <machine/pmap.h>
#include <machine/ipl.h>
+#include <kern/kalloc.h>
#include <stdarg.h>
#include <string.h>
#include <alloca.h>