debian/patches/vm-cache-policy0001-VM-cache-policy-change.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346

From dcbdadadcd006e6b9b8ced1dffa4d47a91557d08 Mon Sep 17 00:00:00 2001
From: Richard Braun <rbraun@sceen.net>
Date: Wed, 9 Oct 2013 11:51:54 +0200
Subject: [PATCH gnumach 1/3] VM cache policy change

This patch lets the kernel unconditionnally cache non empty unreferenced
objects instead of using a fixed arbitrary limit. As the pageout daemon
evicts pages, it collects cached objects that have become empty. The
effective result is a graceful adjustment of the number of objects
related to memory management (virtual memory objects, their associated
ports, and potentially objects maintained in the external memory
managers). Physical memory can now be almost entirely filled up with
cached pages. In addition, these cached pages are not automatically
deactivated as objects can quickly be referenced again.

There are problems with this patch however. The first is that, on
machines with a large amount of physical memory (above 1 GiB but it also
depends on usage patterns), scalability issues are exposed. For example,
file systems which don't throttle their writeback requests can create
thread storms, strongly reducing system responsiveness. Other issues
such as linear scans of memory objects also add visible CPU overhead.

The second is that, as most memory is used, it increases the chances of
swapping deadlocks. Applications that map large objects and quickly
cause lots of page faults can still easily bring the system to its
knees.
---
 vm/vm_object.c   | 166 ++++++++++++++++++-------------------------------------
 vm/vm_object.h   |   7 ++-
 vm/vm_pageout.c  |   7 ++-
 vm/vm_resident.c |   4 +-
 4 files changed, 68 insertions(+), 116 deletions(-)

diff --git a/vm/vm_object.c b/vm/vm_object.c
index a026348..44c13f4 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -59,6 +59,11 @@
 #include <ddb/db_output.h>
 #endif	/* MACH_KDB */
 
+void memory_object_release(
+	ipc_port_t	pager,
+	pager_request_t	pager_request,
+	ipc_port_t	pager_name); /* forward */
+
 /*
  *	Virtual memory objects maintain the actual data
  *	associated with allocated virtual memory.  A given
@@ -159,8 +164,9 @@ vm_object_t		kernel_object = &kernel_object_store;
  *
  *	The kernel may choose to terminate objects from this
  *	queue in order to reclaim storage.  The current policy
- *	is to permit a fixed maximum number of unreferenced
- *	objects (vm_object_cached_max).
+ *	is to let memory pressure dynamically adjust the number
+ *	of unreferenced objects. The pageout daemon attempts to
+ *	collect objects after removing pages from them.
  *
  *	A simple lock (accessed by routines
  *	vm_object_cache_{lock,lock_try,unlock}) governs the
@@ -176,7 +182,6 @@ vm_object_t		kernel_object = &kernel_object_store;
  */
 queue_head_t	vm_object_cached_list;
 int		vm_object_cached_count;
-int		vm_object_cached_max = 4000;	/* may be patched*/
 
 decl_simple_lock_data(,vm_object_cached_lock_data)
 
@@ -343,6 +348,33 @@ void vm_object_init(void)
 			IKOT_PAGING_NAME);
 }
 
+void vm_object_collect(
+	register vm_object_t	object)
+{
+	vm_object_unlock(object);
+
+	/*
+	 *	The cache lock must be acquired in the proper order.
+	 */
+
+	vm_object_cache_lock();
+	vm_object_lock(object);
+
+	/*
+	 *	If the object was referenced while the lock was
+	 *	dropped, cancel the termination.
+	 */
+
+	if (!vm_object_collectable(object)) {
+		vm_object_unlock(object);
+		vm_object_cache_unlock();
+		return;
+	}
+
+	queue_remove(&vm_object_cached_list, object, vm_object_t, cached_list);
+	vm_object_terminate(object);
+}
+
 /*
  *	vm_object_reference:
  *
@@ -403,103 +435,35 @@ void vm_object_deallocate(
 
 		/*
 		 *	See whether this object can persist.  If so, enter
-		 *	it in the cache, then deactivate all of its
-		 *	pages.
+		 *	it in the cache.
 		 */
-		if (object->can_persist) {
-			boolean_t	overflow;
-
-			/*
-			 *	Enter the object onto the queue
-			 *	of "cached" objects.  Remember whether
-			 *	we've caused the queue to overflow,
-			 *	as a hint.
-			 */
-
+		if (object->can_persist && (object->resident_page_count > 0)) {
 			queue_enter(&vm_object_cached_list, object,
 				vm_object_t, cached_list);
-			overflow = (++vm_object_cached_count > vm_object_cached_max);
+			vm_object_cached_count++;
 			vm_object_cached_pages_update(object->resident_page_count);
 			vm_object_cache_unlock();
 
-			vm_object_deactivate_pages(object);
 			vm_object_unlock(object);
+			return;
+		}
 
-			/*
-			 *	If we didn't overflow, or if the queue has
-			 *	been reduced back to below the specified
-			 *	minimum, then quit.
-			 */
-			if (!overflow)
-				return;
-
-			while (TRUE) {
-				vm_object_cache_lock();
-				if (vm_object_cached_count <=
-				    vm_object_cached_max) {
-					vm_object_cache_unlock();
-					return;
-				}
-
-				/*
-				 *	If we must trim down the queue, take
-				 *	the first object, and proceed to
-				 *	terminate it instead of the original
-				 *	object.	 Have to wait for pager init.
-				 *  if it's in progress.
-				 */
-				object= (vm_object_t)
-				    queue_first(&vm_object_cached_list);
-				vm_object_lock(object);
-
-				if (!(object->pager_created &&
-				    !object->pager_initialized)) {
-
-					/*
-					 *  Ok to terminate, hang on to lock.
-					 */
-					break;
-				}
-
-				vm_object_assert_wait(object,
-					VM_OBJECT_EVENT_INITIALIZED, FALSE);
-				vm_object_unlock(object);
-				vm_object_cache_unlock();
-				thread_block((void (*)()) 0);
-
-				/*
-				 *  Continue loop to check if cache still
-				 *  needs to be trimmed.
-				 */
-			}
+		if (object->pager_created &&
+		    !object->pager_initialized) {
 
 			/*
-			 *	Actually remove object from cache.
+			 *	Have to wait for initialization.
+			 *	Put reference back and retry
+			 *	when it's initialized.
 			 */
 
-			queue_remove(&vm_object_cached_list, object,
-					vm_object_t, cached_list);
-			vm_object_cached_count--;
-
-			assert(object->ref_count == 0);
-		}
-		else {
-			if (object->pager_created &&
-			    !object->pager_initialized) {
-
-				/*
-				 *	Have to wait for initialization.
-				 *	Put reference back and retry
-				 *	when it's initialized.
-				 */
-				object->ref_count++;
-				vm_object_assert_wait(object,
-					VM_OBJECT_EVENT_INITIALIZED, FALSE);
-				vm_object_unlock(object);
-				vm_object_cache_unlock();
-				thread_block((void (*)()) 0);
-				continue;
-			  }
+			object->ref_count++;
+			vm_object_assert_wait(object,
+				VM_OBJECT_EVENT_INITIALIZED, FALSE);
+			vm_object_unlock(object);
+			vm_object_cache_unlock();
+			thread_block((void (*)()) 0);
+			continue;
 		}
 
 		/*
@@ -862,28 +826,6 @@ kern_return_t memory_object_destroy(
 }
 
 /*
- *	vm_object_deactivate_pages
- *
- *	Deactivate all pages in the specified object.  (Keep its pages
- *	in memory even though it is no longer referenced.)
- *
- *	The object must be locked.
- */
-void vm_object_deactivate_pages(
-	vm_object_t	object)
-{
-	vm_page_t	p;
-
-	queue_iterate(&object->memq, p, vm_page_t, listq) {
-		vm_page_lock_queues();
-		if (!p->busy)
-			vm_page_deactivate(p);
-		vm_page_unlock_queues();
-	}
-}
-
-
-/*
  *	Routine:	vm_object_pmap_protect
  *
  *	Purpose:
@@ -2701,7 +2643,7 @@ void vm_object_page_remove(
 	 *	It balances vm_object_lookup vs iteration.
 	 */
 
-	if (atop(end - start) < (unsigned)object->resident_page_count/16) {
+	if (atop(end - start) < object->resident_page_count/16) {
 		vm_object_page_remove_lookup++;
 
 		for (; start < end; start += PAGE_SIZE) {
@@ -2926,7 +2868,7 @@ void vm_object_print(
 	iprintf("Object 0x%X: size=0x%X, %d references\n",
 		(vm_offset_t) object, (vm_offset_t) object->size,
 		object->ref_count);
-	iprintf("%d resident pages,", object->resident_page_count);
+	iprintf("%lu resident pages,", object->resident_page_count);
 	 printf(" %d absent pages,", object->absent_count);
 	 printf(" %d paging ops\n", object->paging_in_progress);
 	indent += 1;
diff --git a/vm/vm_object.h b/vm/vm_object.h
index 3bfc67a..fa208aa 100644
--- a/vm/vm_object.h
+++ b/vm/vm_object.h
@@ -72,7 +72,7 @@ struct vm_object {
 						 */
 
 	int			ref_count;	/* Number of references */
-	int			resident_page_count;
+	unsigned long		resident_page_count;
 						/* number of resident pages */
 
 	struct vm_object	*copy;		/* Object that should receive
@@ -169,6 +169,7 @@ vm_object_t	kernel_object;		/* the single kernel object */
 
 extern void		vm_object_bootstrap(void);
 extern void		vm_object_init(void);
+extern void		vm_object_collect(vm_object_t);
 extern void		vm_object_terminate(vm_object_t);
 extern vm_object_t	vm_object_allocate(vm_size_t);
 extern void		vm_object_reference(vm_object_t);
@@ -290,6 +291,10 @@ vm_object_t vm_object_copy_delayed(
  *	Routines implemented as macros
  */
 
+#define vm_object_collectable(object)					\
+	(((object)->ref_count == 0)					\
+	&& ((object)->resident_page_count == 0))
+
 #define	vm_object_paging_begin(object) 					\
 	((object)->paging_in_progress++)
 
diff --git a/vm/vm_pageout.c b/vm/vm_pageout.c
index 51a6a0d..5e1ad1d 100644
--- a/vm/vm_pageout.c
+++ b/vm/vm_pageout.c
@@ -748,7 +748,12 @@ void vm_pageout_scan(void)
 		    reclaim_page:
 			vm_page_free(m);
 			vm_page_unlock_queues();
-			vm_object_unlock(object);
+
+			if (vm_object_collectable(object))
+				vm_object_collect(object);
+			else
+				vm_object_unlock(object);
+
 			continue;
 		}
 
diff --git a/vm/vm_resident.c b/vm/vm_resident.c
index c70fa73..b65b756 100644
--- a/vm/vm_resident.c
+++ b/vm/vm_resident.c
@@ -523,7 +523,7 @@ void vm_page_insert(
 	 */
 
 	object->resident_page_count++;
-	assert(object->resident_page_count >= 0);
+	assert(object->resident_page_count != 0);
 
 	if (object->can_persist && (object->ref_count == 0))
 		vm_object_cached_pages_update(1);
@@ -630,7 +630,7 @@ void vm_page_replace(
 	 */
 
 	object->resident_page_count++;
-	assert(object->resident_page_count >= 0);
+	assert(object->resident_page_count != 0);
 
 	if (object->can_persist && (object->ref_count == 0))
 		vm_object_cached_pages_update(1);
-- 
2.1.4