summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2012-12-07 20:29:45 +0100
committerJustus Winter <4winter@informatik.uni-hamburg.de>2015-08-28 15:48:04 +0200
commit3d7a59680e7d500de23403ade0f5d07adaa9f229 (patch)
tree3927cc70503e895a08aa95d0e5f21fdbda910872
parent89ea8fc44d2e07f9611c65a1181cec2bb80d29c2 (diff)
kern/slab: rework buffer-to-slab lookup
Instead of using a red-black tree, rely on the VM system to store kmem specific private data.
-rw-r--r--kern/slab.c142
-rw-r--r--kern/slab.h7
2 files changed, 70 insertions, 79 deletions
diff --git a/kern/slab.c b/kern/slab.c
index 9ccbb43..cc0c8a8 100644
--- a/kern/slab.c
+++ b/kern/slab.c
@@ -52,21 +52,15 @@
* differences are outlined below.
*
* The per-cache self-scaling hash table for buffer-to-bufctl conversion,
- * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by
- * a red-black tree storing slabs, sorted by address. The use of a
- * self-balancing tree for buffer-to-slab conversions provides a few advantages
- * over a hash table. Unlike a hash table, a BST provides a "lookup nearest"
- * operation, so obtaining the slab data (whether it is embedded in the slab or
- * off slab) from a buffer address simply consists of a "lookup nearest towards
- * 0" tree search. Storing slabs instead of buffers also considerably reduces
- * the number of elements to retain. Finally, a self-balancing tree is a true
- * self-scaling data structure, whereas a hash table requires periodic
- * maintenance and complete resizing, which is expensive. The only drawback is
- * that releasing a buffer to the slab layer takes logarithmic time instead of
- * constant time. But as the data set size is kept reasonable (because slabs
- * are stored instead of buffers) and because the CPU pool layer services most
- * requests, avoiding many accesses to the slab layer, it is considered an
- * acceptable tradeoff.
+ * described in 3.2.3 "Slab Layout for Large Objects", has been replaced with
+ * a constant time buffer-to-slab lookup that relies on the VM system. When
+ * slabs are created, their backing virtual pages are mapped to physical pages
+ * that are never aliased by other virtual addresses (unless explicitely by
+ * other kernel code). The lookup operation provides the associated physical
+ * page descriptor which can store data private to this allocator. The main
+ * drawback of this method is that it needs to walk the low level page tables,
+ * but it's expected that these have already been cached by the CPU due to
+ * prior access by the allocator user, depending on the hardware properties.
*
* This implementation uses per-cpu pools of objects, which service most
* allocation requests. These pools act as caches (but are named differently
@@ -448,8 +442,7 @@ static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache,
slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1;
}
- list_node_init(&slab->list_node);
- rbtree_node_init(&slab->tree_node);
+ list_node_init(&slab->node);
slab->nr_refs = 0;
slab->first_free = NULL;
slab->addr = slab_buf + color;
@@ -521,33 +514,34 @@ static void kmem_slab_destroy(struct kmem_slab *slab, struct kmem_cache *cache)
kmem_cache_free(&kmem_slab_cache, (vm_offset_t)slab);
}
-static inline int kmem_slab_use_tree(int flags)
+static inline unsigned long
+kmem_slab_buf(const struct kmem_slab *slab)
{
- return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY);
+ return P2ALIGN((unsigned long)slab->addr, PAGE_SIZE);
}
-static inline int kmem_slab_cmp_lookup(const void *addr,
- const struct rbtree_node *node)
+static void
+kmem_slab_vmref(struct kmem_slab *slab, size_t size)
{
- struct kmem_slab *slab;
+ struct vm_page *page;
+ unsigned long va, end;
- slab = rbtree_entry(node, struct kmem_slab, tree_node);
+ va = kmem_slab_buf(slab);
+ end = va + size;
- if (addr == slab->addr)
- return 0;
- else if (addr < slab->addr)
- return -1;
- else
- return 1;
+ do {
+ page = vm_kmem_lookup_page(va);
+ assert(page != NULL);
+ assert(page->slab_priv == NULL);
+ page->slab_priv = slab;
+ va += PAGE_SIZE;
+ } while (va < end);
}
-static inline int kmem_slab_cmp_insert(const struct rbtree_node *a,
- const struct rbtree_node *b)
+static inline int
+kmem_slab_lookup_needed(int flags)
{
- struct kmem_slab *slab;
-
- slab = rbtree_entry(a, struct kmem_slab, tree_node);
- return kmem_slab_cmp_lookup(slab->addr, b);
+ return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY);
}
#if SLAB_USE_CPU_POOLS
@@ -788,7 +782,6 @@ void kmem_cache_init(struct kmem_cache *cache, const char *name,
list_node_init(&cache->node);
list_init(&cache->partial_slabs);
list_init(&cache->free_slabs);
- rbtree_init(&cache->active_slabs);
cache->obj_size = obj_size;
cache->align = align;
cache->buf_size = buf_size;
@@ -865,10 +858,13 @@ static int kmem_cache_grow(struct kmem_cache *cache)
simple_lock(&cache->lock);
if (slab != NULL) {
- list_insert_head(&cache->free_slabs, &slab->list_node);
+ list_insert_head(&cache->free_slabs, &slab->node);
cache->nr_bufs += cache->bufs_per_slab;
cache->nr_slabs++;
cache->nr_free_slabs++;
+
+ if (kmem_slab_lookup_needed(cache->flags))
+ kmem_slab_vmref(slab, cache->slab_size);
}
/*
@@ -898,8 +894,8 @@ static void kmem_cache_reap(struct kmem_cache *cache)
simple_unlock(&cache->lock);
while (!list_empty(&dead_slabs)) {
- slab = list_first_entry(&dead_slabs, struct kmem_slab, list_node);
- list_remove(&slab->list_node);
+ slab = list_first_entry(&dead_slabs, struct kmem_slab, node);
+ list_remove(&slab->node);
kmem_slab_destroy(slab, cache);
nr_free_slabs--;
}
@@ -918,11 +914,9 @@ static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache)
union kmem_bufctl *bufctl;
if (!list_empty(&cache->partial_slabs))
- slab = list_first_entry(&cache->partial_slabs, struct kmem_slab,
- list_node);
+ slab = list_first_entry(&cache->partial_slabs, struct kmem_slab, node);
else if (!list_empty(&cache->free_slabs))
- slab = list_first_entry(&cache->free_slabs, struct kmem_slab,
- list_node);
+ slab = list_first_entry(&cache->free_slabs, struct kmem_slab, node);
else
return NULL;
@@ -934,7 +928,7 @@ static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache)
if (slab->nr_refs == cache->bufs_per_slab) {
/* The slab has become complete */
- list_remove(&slab->list_node);
+ list_remove(&slab->node);
if (slab->nr_refs == 1)
cache->nr_free_slabs--;
@@ -943,15 +937,11 @@ static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache)
* The slab has become partial. Insert the new slab at the end of
* the list to reduce fragmentation.
*/
- list_remove(&slab->list_node);
- list_insert_tail(&cache->partial_slabs, &slab->list_node);
+ list_remove(&slab->node);
+ list_insert_tail(&cache->partial_slabs, &slab->node);
cache->nr_free_slabs--;
}
- if ((slab->nr_refs == 1) && kmem_slab_use_tree(cache->flags))
- rbtree_insert(&cache->active_slabs, &slab->tree_node,
- kmem_slab_cmp_insert);
-
return kmem_bufctl_to_buf(bufctl, cache);
}
@@ -970,14 +960,14 @@ static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size)
- 1;
} else {
- struct rbtree_node *node;
-
- node = rbtree_lookup_nearest(&cache->active_slabs, buf,
- kmem_slab_cmp_lookup, RBTREE_LEFT);
- assert(node != NULL);
- slab = rbtree_entry(node, struct kmem_slab, tree_node);
- assert((unsigned long)buf < (P2ALIGN((unsigned long)slab->addr
- + cache->slab_size, PAGE_SIZE)));
+ struct vm_page *page;
+
+ page = vm_kmem_lookup_page((unsigned long)buf);
+ assert(page != NULL);
+ slab = page->slab_priv;
+ assert(slab != NULL);
+ assert((unsigned long)buf >= kmem_slab_buf(slab));
+ assert((unsigned long)buf < (kmem_slab_buf(slab) + cache->slab_size));
}
assert(slab->nr_refs >= 1);
@@ -988,20 +978,23 @@ static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
slab->nr_refs--;
cache->nr_objs--;
+ /*
+ * The slab has become free.
+ */
if (slab->nr_refs == 0) {
- /* The slab has become free */
-
- if (kmem_slab_use_tree(cache->flags))
- rbtree_remove(&cache->active_slabs, &slab->tree_node);
-
+ /*
+ * The slab was partial.
+ */
if (cache->bufs_per_slab > 1)
- list_remove(&slab->list_node);
+ list_remove(&slab->node);
- list_insert_head(&cache->free_slabs, &slab->list_node);
+ list_insert_head(&cache->free_slabs, &slab->node);
cache->nr_free_slabs++;
} else if (slab->nr_refs == (cache->bufs_per_slab - 1)) {
- /* The slab has become partial */
- list_insert_head(&cache->partial_slabs, &slab->list_node);
+ /*
+ * The slab has become partial.
+ */
+ list_insert_head(&cache->partial_slabs, &slab->node);
}
}
@@ -1105,22 +1098,23 @@ slab_alloc:
static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf)
{
- struct rbtree_node *node;
struct kmem_buftag *buftag;
struct kmem_slab *slab;
union kmem_bufctl *bufctl;
+ struct vm_page *page;
unsigned char *redzone_byte;
unsigned long slabend;
- simple_lock(&cache->lock);
- node = rbtree_lookup_nearest(&cache->active_slabs, buf,
- kmem_slab_cmp_lookup, RBTREE_LEFT);
- simple_unlock(&cache->lock);
+ page = vm_page_lookup_pa(vm_page_direct_pa((unsigned long)buf));
+
+ if (page == NULL)
+ kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
+
+ slab = page->slab_priv;
- if (node == NULL)
+ if (slab == NULL)
kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
- slab = rbtree_entry(node, struct kmem_slab, tree_node);
slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE);
if ((unsigned long)buf >= slabend)
diff --git a/kern/slab.h b/kern/slab.h
index bbd9d41..64daa61 100644
--- a/kern/slab.h
+++ b/kern/slab.h
@@ -50,7 +50,6 @@
#include <cache.h>
#include <kern/lock.h>
#include <kern/list.h>
-#include <kern/rbtree.h>
#include <mach/machine/vm_types.h>
#include <sys/types.h>
#include <vm/vm_types.h>
@@ -120,8 +119,7 @@ struct kmem_buftag {
* Page-aligned collection of unconstructed buffers.
*/
struct kmem_slab {
- struct list list_node;
- struct rbtree_node tree_node;
+ struct list node;
unsigned long nr_refs;
union kmem_bufctl *first_free;
void *addr;
@@ -162,7 +160,7 @@ typedef void (*kmem_slab_free_fn_t)(vm_offset_t, vm_size_t);
#define KMEM_CF_NO_CPU_POOL 0x1 /* CPU pool layer disabled */
#define KMEM_CF_SLAB_EXTERNAL 0x2 /* Slab data is off slab */
#define KMEM_CF_VERIFY 0x4 /* Debugging facilities enabled */
-#define KMEM_CF_DIRECT 0x8 /* No buf-to-slab tree lookup */
+#define KMEM_CF_DIRECT 0x8 /* Quick buf-to-slab lookup */
/*
* Cache of objects.
@@ -185,7 +183,6 @@ struct kmem_cache {
struct list node; /* Cache list linkage */
struct list partial_slabs;
struct list free_slabs;
- struct rbtree active_slabs;
int flags;
size_t bufctl_dist; /* Distance from buffer to bufctl */
size_t slab_size;