Initial source

author: Thomas Bushnell <thomas@gnu.org> 1997-02-25 21:28:37 +0000
committer: Thomas Bushnell <thomas@gnu.org> 1997-02-25 21:28:37 +0000
commit: f07a4c844da9f0ecae5bbee1ab94be56505f26f7 (patch)
tree: 12b07c7e578fc1a5f53dbfde2632408491ff2a70 /kern/sched_prim.c
1 files changed, 2062 insertions, 0 deletions
diff --git a/kern/sched_prim.c b/kern/sched_prim.c
new file mode 100644
index 0000000..b17e612
--- /dev/null
+++ b/kern/sched_prim.c
@@ -0,0 +1,2062 @@
+/* 
+ * Mach Operating System
+ * Copyright (c) 1993-1987 Carnegie Mellon University
+ * All Rights Reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ * 
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ * 
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ *	File:	sched_prim.c
+ *	Author:	Avadis Tevanian, Jr.
+ *	Date:	1986
+ *
+ *	Scheduling primitives
+ *
+ */
+
+#include <cpus.h>
+#include <simple_clock.h>
+#include <mach_fixpri.h>
+#include <mach_host.h>
+#include <hw_footprint.h>
+#include <fast_tas.h>
+#include <power_save.h>
+
+#include <mach/machine.h>
+#include <kern/ast.h>
+#include <kern/counters.h>
+#include <kern/cpu_number.h>
+#include <kern/lock.h>
+#include <kern/macro_help.h>
+#include <kern/processor.h>
+#include <kern/queue.h>
+#include <kern/sched.h>
+#include <kern/sched_prim.h>
+#include <kern/syscall_subr.h>
+#include <kern/thread.h>
+#include <kern/thread_swap.h>
+#include <kern/time_out.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <machine/machspl.h>	/* For def'n of splsched() */
+
+#if	MACH_FIXPRI
+#include <mach/policy.h>
+#endif	/* MACH_FIXPRI */
+
+
+extern int hz;
+
+int		min_quantum;	/* defines max context switch rate */
+
+unsigned	sched_tick;
+
+#if	SIMPLE_CLOCK
+int		sched_usec;
+#endif	/* SIMPLE_CLOCK */
+
+thread_t	sched_thread_id;
+
+void recompute_priorities(void);	/* forward */
+void update_priority(thread_t);
+void set_pri(thread_t, int, boolean_t);
+void do_thread_scan(void);
+
+thread_t	choose_pset_thread();
+
+timer_elt_data_t recompute_priorities_timer;
+
+#if	DEBUG
+void checkrq(run_queue_t, char *);
+void thread_check(thread_t, run_queue_t);
+#endif
+
+/*
+ *	State machine
+ *
+ * states are combinations of:
+ *  R	running
+ *  W	waiting (or on wait queue)
+ *  S	suspended (or will suspend)
+ *  N	non-interruptible
+ *
+ * init	action 
+ *	assert_wait thread_block    clear_wait	suspend	resume
+ *
+ * R	RW,  RWN    R;   setrun	    -		RS	-
+ * RS	RWS, RWNS   S;  wake_active -		-	R
+ * RN	RWN	    RN;  setrun	    -		RNS	-
+ * RNS	RWNS	    RNS; setrun	    -		-	RN
+ *
+ * RW		    W		    R		RWS	-
+ * RWN		    WN		    RN		RWNS	-
+ * RWS		    WS; wake_active RS		-	RW
+ * RWNS		    WNS		    RNS		-	RWN
+ *
+ * W				    R;   setrun	WS	-
+ * WN				    RN;  setrun	WNS	-
+ * WNS				    RNS; setrun	-	WN
+ *
+ * S				    -		-	R
+ * WS				    S		-	W
+ *
+ */
+
+/*
+ *	Waiting protocols and implementation:
+ *
+ *	Each thread may be waiting for exactly one event; this event
+ *	is set using assert_wait().  That thread may be awakened either
+ *	by performing a thread_wakeup_prim() on its event,
+ *	or by directly waking that thread up with clear_wait().
+ *
+ *	The implementation of wait events uses a hash table.  Each
+ *	bucket is queue of threads having the same hash function
+ *	value; the chain for the queue (linked list) is the run queue
+ *	field.  [It is not possible to be waiting and runnable at the
+ *	same time.]
+ *
+ *	Locks on both the thread and on the hash buckets govern the
+ *	wait event field and the queue chain field.  Because wakeup
+ *	operations only have the event as an argument, the event hash
+ *	bucket must be locked before any thread.
+ *
+ *	Scheduling operations may also occur at interrupt level; therefore,
+ *	interrupts below splsched() must be prevented when holding
+ *	thread or hash bucket locks.
+ *
+ *	The wait event hash table declarations are as follows:
+ */
+
+#define NUMQUEUES	59
+
+queue_head_t		wait_queue[NUMQUEUES];
+decl_simple_lock_data(,	wait_lock[NUMQUEUES])
+
+/* NOTE: we want a small positive integer out of this */
+#define wait_hash(event) \
+	((((int)(event) < 0) ? ~(int)(event) : (int)(event)) % NUMQUEUES)
+
+void wait_queue_init(void)
+{
+	register int i;
+
+	for (i = 0; i < NUMQUEUES; i++) {
+		queue_init(&wait_queue[i]);
+		simple_lock_init(&wait_lock[i]);
+	}
+}
+
+void sched_init(void)
+{
+	recompute_priorities_timer.fcn = (int (*)())recompute_priorities;
+	recompute_priorities_timer.param = (char *)0;
+
+	min_quantum = hz / 10;		/* context switch 10 times/second */
+	wait_queue_init();
+	pset_sys_bootstrap();		/* initialize processer mgmt. */
+	queue_init(&action_queue);
+	simple_lock_init(&action_lock);
+	sched_tick = 0;
+#if	SIMPLE_CLOCK
+	sched_usec = 0;
+#endif	/* SIMPLE_CLOCK */
+	ast_init();
+}
+
+/*
+ *	Thread timeout routine, called when timer expires.
+ *	Called at splsoftclock.
+ */
+void thread_timeout(
+	thread_t thread)
+{
+	assert(thread->timer.set == TELT_UNSET);
+
+	clear_wait(thread, THREAD_TIMED_OUT, FALSE);
+}
+
+/*
+ *	thread_set_timeout:
+ *
+ *	Set a timer for the current thread, if the thread
+ *	is ready to wait.  Must be called between assert_wait()
+ *	and thread_block().
+ */
+ 
+void thread_set_timeout(
+	int	t)	/* timeout interval in ticks */
+{
+	register thread_t	thread = current_thread();
+	register spl_t s;
+
+	s = splsched();
+	thread_lock(thread);
+	if ((thread->state & TH_WAIT) != 0) {
+		set_timeout(&thread->timer, t);
+	}
+	thread_unlock(thread);
+	splx(s);
+}
+
+/*
+ * Set up thread timeout element when thread is created.
+ */
+void thread_timeout_setup(
+	register thread_t	thread)
+{
+	thread->timer.fcn = (int (*)())thread_timeout;
+	thread->timer.param = (char *)thread;
+	thread->depress_timer.fcn = (int (*)())thread_depress_timeout;
+	thread->depress_timer.param = (char *)thread;
+}
+
+/*
+ *	assert_wait:
+ *
+ *	Assert that the current thread is about to go to
+ *	sleep until the specified event occurs.
+ */
+void assert_wait(
+	event_t		event,
+	boolean_t	interruptible)
+{
+	register queue_t	q;
+	register int		index;
+	register thread_t	thread;
+#if	MACH_SLOCKS
+	register simple_lock_t	lock;
+#endif	/* MACH_SLOCKS */
+	spl_t			s;
+
+	thread = current_thread();
+	if (thread->wait_event != 0) {
+		panic("assert_wait: already asserted event %#x\n",
+		      thread->wait_event);
+	}
+ 	s = splsched();
+	if (event != 0) {
+		index = wait_hash(event);
+		q = &wait_queue[index];
+#if	MACH_SLOCKS
+		lock = &wait_lock[index];
+#endif	/* MACH_SLOCKS */
+		simple_lock(lock);
+		thread_lock(thread);
+		enqueue_tail(q, (queue_entry_t) thread);
+		thread->wait_event = event;
+		if (interruptible)
+			thread->state |= TH_WAIT;
+		else
+			thread->state |= TH_WAIT | TH_UNINT;
+		thread_unlock(thread);
+		simple_unlock(lock);
+	}
+	else {
+		thread_lock(thread);
+		if (interruptible)
+			thread->state |= TH_WAIT;
+		else
+			thread->state |= TH_WAIT | TH_UNINT;
+		thread_unlock(thread);
+	}
+	splx(s);
+}
+
+/*
+ *	clear_wait:
+ *
+ *	Clear the wait condition for the specified thread.  Start the thread
+ *	executing if that is appropriate.
+ *
+ *	parameters:
+ *	  thread		thread to awaken
+ *	  result		Wakeup result the thread should see
+ *	  interrupt_only	Don't wake up the thread if it isn't
+ *				interruptible.
+ */
+void clear_wait(
+	register thread_t	thread,
+	int			result,
+	boolean_t		interrupt_only)
+{
+	register int		index;
+	register queue_t	q;
+#if	MACH_SLOCKS
+	register simple_lock_t	lock;
+#endif	/* MACH_SLOCKS */
+	register event_t	event;
+	spl_t			s;
+
+	s = splsched();
+	thread_lock(thread);
+	if (interrupt_only && (thread->state & TH_UNINT)) {
+		/*
+		 *	can`t interrupt thread
+		 */
+		thread_unlock(thread);
+		splx(s);
+		return;
+	}
+
+	event = thread->wait_event;
+	if (event != 0) {
+		thread_unlock(thread);
+		index = wait_hash(event);
+		q = &wait_queue[index];
+#if	MACH_SLOCKS
+		lock = &wait_lock[index];
+#endif	/* MACH_SLOCKS */
+		simple_lock(lock);
+		/*
+		 *	If the thread is still waiting on that event,
+		 *	then remove it from the list.  If it is waiting
+		 *	on a different event, or no event at all, then
+		 *	someone else did our job for us.
+		 */
+		thread_lock(thread);
+		if (thread->wait_event == event) {
+			remqueue(q, (queue_entry_t)thread);
+			thread->wait_event = 0;
+			event = 0;		/* cause to run below */
+		}
+		simple_unlock(lock);
+	}
+	if (event == 0) {
+		register int	state = thread->state;
+
+		reset_timeout_check(&thread->timer);
+
+		switch (state & TH_SCHED_STATE) {
+		    case	  TH_WAIT | TH_SUSP | TH_UNINT:
+		    case	  TH_WAIT	    | TH_UNINT:
+		    case	  TH_WAIT:
+			/*
+			 *	Sleeping and not suspendable - put
+			 *	on run queue.
+			 */
+			thread->state = (state &~ TH_WAIT) | TH_RUN;
+			thread->wait_result = result;
+			thread_setrun(thread, TRUE);
+			break;
+
+		    case	  TH_WAIT | TH_SUSP:
+		    case TH_RUN | TH_WAIT:
+		    case TH_RUN | TH_WAIT | TH_SUSP:
+		    case TH_RUN | TH_WAIT	    | TH_UNINT:
+		    case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT:
+			/*
+			 *	Either already running, or suspended.
+			 */
+			thread->state = state &~ TH_WAIT;
+			thread->wait_result = result;
+			break;
+
+		    default:
+			/*
+			 *	Not waiting.
+			 */
+			break;
+		}
+	}
+	thread_unlock(thread);
+	splx(s);
+}
+
+/*
+ *	thread_wakeup_prim:
+ *
+ *	Common routine for thread_wakeup, thread_wakeup_with_result,
+ *	and thread_wakeup_one.
+ *
+ */
+void thread_wakeup_prim(
+	event_t		event,
+	boolean_t	one_thread,
+	int		result)
+{
+	register queue_t	q;
+	register int		index;
+	register thread_t	thread, next_th;
+#if	MACH_SLOCKS
+	register simple_lock_t	lock;
+#endif  /* MACH_SLOCKS */
+	spl_t			s;
+	register int		state;
+
+	index = wait_hash(event);
+	q = &wait_queue[index];
+	s = splsched();
+#if	MACH_SLOCKS
+	lock = &wait_lock[index];
+#endif	/* MACH_SLOCKS */
+	simple_lock(lock);
+	thread = (thread_t) queue_first(q);
+	while (!queue_end(q, (queue_entry_t)thread)) {
+		next_th = (thread_t) queue_next((queue_t) thread);
+
+		if (thread->wait_event == event) {
+			thread_lock(thread);
+			remqueue(q, (queue_entry_t) thread);
+			thread->wait_event = 0;
+			reset_timeout_check(&thread->timer);
+
+			state = thread->state;
+			switch (state & TH_SCHED_STATE) {
+
+			    case	  TH_WAIT | TH_SUSP | TH_UNINT:
+			    case	  TH_WAIT	    | TH_UNINT:
+			    case	  TH_WAIT:
+				/*
+				 *	Sleeping and not suspendable - put
+				 *	on run queue.
+				 */
+				thread->state = (state &~ TH_WAIT) | TH_RUN;
+				thread->wait_result = result;
+				thread_setrun(thread, TRUE);
+				break;
+
+			    case	  TH_WAIT | TH_SUSP:
+			    case TH_RUN | TH_WAIT:
+			    case TH_RUN | TH_WAIT | TH_SUSP:
+			    case TH_RUN | TH_WAIT	    | TH_UNINT:
+			    case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT:
+				/*
+				 *	Either already running, or suspended.
+				 */
+				thread->state = state &~ TH_WAIT;
+				thread->wait_result = result;
+				break;
+
+			    default:
+				panic("thread_wakeup");
+				break;
+			}
+			thread_unlock(thread);
+			if (one_thread)
+				break;
+		}
+		thread = next_th;
+	}
+	simple_unlock(lock);
+	splx(s);
+}
+
+/*
+ *	thread_sleep:
+ *
+ *	Cause the current thread to wait until the specified event
+ *	occurs.  The specified lock is unlocked before releasing
+ *	the cpu.  (This is a convenient way to sleep without manually
+ *	calling assert_wait).
+ */
+void thread_sleep(
+	event_t		event,
+	simple_lock_t	lock,
+	boolean_t	interruptible)
+{
+	assert_wait(event, interruptible);	/* assert event */
+	simple_unlock(lock);			/* release the lock */
+	thread_block((void (*)()) 0);		/* block ourselves */
+}
+
+/*
+ *	thread_bind:
+ *
+ *	Force a thread to execute on the specified processor.
+ *	If the thread is currently executing, it may wait until its
+ *	time slice is up before switching onto the specified processor.
+ *
+ *	A processor of PROCESSOR_NULL causes the thread to be unbound.
+ *	xxx - DO NOT export this to users.
+ */
+void thread_bind(
+	register thread_t	thread,
+	processor_t		processor)
+{
+	spl_t		s;
+
+	s = splsched();
+	thread_lock(thread);
+	thread->bound_processor = processor;
+	thread_unlock(thread);
+	(void) splx(s);
+}
+
+/*
+ *	Select a thread for this processor (the current processor) to run.
+ *	May select the current thread.
+ *	Assumes splsched.
+ */
+
+thread_t thread_select(
+	register processor_t myprocessor)
+{
+	register thread_t thread;
+
+	myprocessor->first_quantum = TRUE;
+	/*
+	 *	Check for obvious simple case; local runq is
+	 *	empty and global runq has entry at hint.
+	 */
+	if (myprocessor->runq.count > 0) {
+		thread = choose_thread(myprocessor);
+		myprocessor->quantum = min_quantum;
+	}
+	else {
+		register processor_set_t pset;
+
+#if	MACH_HOST
+		pset = myprocessor->processor_set;
+#else	/* MACH_HOST */
+		pset = &default_pset;
+#endif	/* MACH_HOST */
+		simple_lock(&pset->runq.lock);
+#if	DEBUG
+		checkrq(&pset->runq, "thread_select");
+#endif	/* DEBUG */
+		if (pset->runq.count == 0) {
+			/*
+			 *	Nothing else runnable.  Return if this
+			 *	thread is still runnable on this processor.
+			 *	Check for priority update if required.
+			 */
+			thread = current_thread();
+			if ((thread->state == TH_RUN) &&
+#if	MACH_HOST
+			    (thread->processor_set == pset) &&
+#endif	/* MACH_HOST */
+			    ((thread->bound_processor == PROCESSOR_NULL) ||
+			     (thread->bound_processor == myprocessor))) {
+
+				simple_unlock(&pset->runq.lock);
+				thread_lock(thread);
+				if (thread->sched_stamp != sched_tick)
+				    update_priority(thread);
+				thread_unlock(thread);
+			}
+			else {
+				thread = choose_pset_thread(myprocessor, pset);
+			}
+		}
+		else {
+			register queue_t	q;
+
+			/*
+			 *	If there is a thread at hint, grab it,
+			 *	else call choose_pset_thread.
+			 */
+			q = pset->runq.runq + pset->runq.low;
+
+			if (queue_empty(q)) {
+				pset->runq.low++;
+				thread = choose_pset_thread(myprocessor, pset);
+			}
+			else {
+				thread = (thread_t) dequeue_head(q);
+				thread->runq = RUN_QUEUE_NULL;
+				pset->runq.count--;
+#if	MACH_FIXPRI
+				/*
+				 *	Cannot lazy evaluate pset->runq.low for
+				 *	fixed priority policy
+				 */
+				if ((pset->runq.count > 0) &&
+				    (pset->policies & POLICY_FIXEDPRI)) {
+					    while (queue_empty(q)) {
+						pset->runq.low++;
+						q++;
+					    }
+				}
+#endif	/* MACH_FIXPRI */
+#if	DEBUG
+				checkrq(&pset->runq, "thread_select: after");
+#endif	/* DEBUG */
+				simple_unlock(&pset->runq.lock);
+			}
+		}
+
+#if	MACH_FIXPRI
+		if (thread->policy == POLICY_TIMESHARE) {
+#endif	/* MACH_FIXPRI */
+			myprocessor->quantum = pset->set_quantum;
+#if	MACH_FIXPRI
+		}
+		else {
+			/*
+			 *	POLICY_FIXEDPRI
+			 */
+			myprocessor->quantum = thread->sched_data;
+		}
+#endif	/* MACH_FIXPRI */
+	}
+
+	return thread;
+}
+
+/*
+ *	Stop running the current thread and start running the new thread.
+ *	If continuation is non-zero, and the current thread is blocked,
+ *	then it will resume by executing continuation on a new stack.
+ *	Returns TRUE if the hand-off succeeds.
+ *	Assumes splsched.
+ */
+
+boolean_t thread_invoke(
+	register thread_t old_thread,
+	continuation_t	  continuation,
+	register thread_t new_thread)
+{
+	/*
+	 *	Check for invoking the same thread.
+	 */
+	if (old_thread == new_thread) {
+	    /*
+	     *	Mark thread interruptible.
+	     *	Run continuation if there is one.
+	     */
+	    thread_lock(new_thread);
+	    new_thread->state &= ~TH_UNINT;
+	    thread_unlock(new_thread);
+
+	    if (continuation != (void (*)()) 0) {
+		(void) spl0();
+		call_continuation(continuation);
+		/*NOTREACHED*/
+	    }
+	    return TRUE;
+	}
+
+	/*
+	 *	Check for stack-handoff.
+	 */
+	thread_lock(new_thread);
+	if ((old_thread->stack_privilege != current_stack()) &&
+	    (continuation != (void (*)()) 0))
+	{
+	    switch (new_thread->state & TH_SWAP_STATE) {
+		case TH_SWAPPED:
+
+		    new_thread->state &= ~(TH_SWAPPED | TH_UNINT);
+		    thread_unlock(new_thread);
+
+#if	NCPUS > 1
+		    new_thread->last_processor = current_processor();
+#endif	/* NCPUS > 1 */
+
+		    /*
+		     *	Set up ast context of new thread and
+		     *	switch to its timer.
+		     */
+		    ast_context(new_thread, cpu_number());
+		    timer_switch(&new_thread->system_timer);
+
+		    stack_handoff(old_thread, new_thread);
+
+		    /*
+		     *	We can dispatch the old thread now.
+		     *	This is like thread_dispatch, except
+		     *	that the old thread is left swapped
+		     *	*without* freeing its stack.
+		     *	This path is also much more frequent
+		     *	than actual calls to thread_dispatch.
+		     */
+
+		    thread_lock(old_thread);
+		    old_thread->swap_func = continuation;
+
+		    switch (old_thread->state) {
+			case TH_RUN	      | TH_SUSP:
+			case TH_RUN	      | TH_SUSP | TH_HALTED:
+			case TH_RUN | TH_WAIT | TH_SUSP:
+			    /*
+			     *	Suspend the thread
+			     */
+			    old_thread->state = (old_thread->state & ~TH_RUN)
+						| TH_SWAPPED;
+			    if (old_thread->wake_active) {
+				old_thread->wake_active = FALSE;
+				thread_unlock(old_thread);
+				thread_wakeup((event_t)&old_thread->wake_active);
+
+				goto after_old_thread;
+			    }
+			    break;
+
+			case TH_RUN	      | TH_SUSP | TH_UNINT:
+			case TH_RUN			| TH_UNINT:
+			case TH_RUN:
+			    /*
+			     *	We can`t suspend the thread yet,
+			     *	or it`s still running.
+			     *	Put back on a run queue.
+			     */
+			    old_thread->state |= TH_SWAPPED;
+			    thread_setrun(old_thread, FALSE);
+			    break;
+
+			case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT:
+			case TH_RUN | TH_WAIT		| TH_UNINT:
+			case TH_RUN | TH_WAIT:
+			    /*
+			     *	Waiting, and not suspendable.
+			     */
+			    old_thread->state = (old_thread->state & ~TH_RUN)
+						| TH_SWAPPED;
+			    break;
+
+			case TH_RUN | TH_IDLE:
+			    /*
+			     *	Drop idle thread -- it is already in
+			     *	idle_thread_array.
+			     */
+			    old_thread->state = TH_RUN | TH_IDLE | TH_SWAPPED;
+			    break;
+
+			default:
+			    panic("thread_invoke");
+		    }
+		    thread_unlock(old_thread);
+		after_old_thread:
+
+		    /*
+		     *	call_continuation calls the continuation
+		     *	after resetting the current stack pointer
+		     *	to recover stack space.  If we called
+		     *	the continuation directly, we would risk
+		     *	running out of stack.
+		     */
+
+		    counter_always(c_thread_invoke_hits++);
+		    (void) spl0();
+		    call_continuation(new_thread->swap_func);
+		    /*NOTREACHED*/
+		    return TRUE; /* help for the compiler */
+
+		case TH_SW_COMING_IN:
+		    /*
+		     *	Waiting for a stack
+		     */
+		    thread_swapin(new_thread);
+		    thread_unlock(new_thread);
+		    counter_always(c_thread_invoke_misses++);
+		    return FALSE;
+
+		case 0:
+		    /*
+		     *	Already has a stack - can`t handoff.
+		     */
+		    break;
+	    }
+	}
+
+	else {
+	    /*
+	     *	Check that the thread is swapped-in.
+	     */
+	    if (new_thread->state & TH_SWAPPED) {
+		if ((new_thread->state & TH_SW_COMING_IN) ||
+		    !stack_alloc_try(new_thread, thread_continue))
+		{
+		    thread_swapin(new_thread);
+		    thread_unlock(new_thread);
+		    counter_always(c_thread_invoke_misses++);
+		    return FALSE;
+		}
+	    }
+	}
+
+	new_thread->state &= ~(TH_SWAPPED | TH_UNINT);
+	thread_unlock(new_thread);
+
+	/*
+	 *	Thread is now interruptible.
+	 */
+#if	NCPUS > 1
+	new_thread->last_processor = current_processor();
+#endif	/* NCPUS > 1 */
+
+	/*
+	 *	Set up ast context of new thread and switch to its timer.
+	 */
+	ast_context(new_thread, cpu_number());
+	timer_switch(&new_thread->system_timer);
+
+	/*
+	 *	switch_context is machine-dependent.  It does the
+	 *	machine-dependent components of a context-switch, like
+	 *	changing address spaces.  It updates active_threads.
+	 *	It returns only if a continuation is not supplied.
+	 */
+	counter_always(c_thread_invoke_csw++);
+	old_thread = switch_context(old_thread, continuation, new_thread);
+
+	/*
+	 *	We're back.  Now old_thread is the thread that resumed
+	 *	us, and we have to dispatch it.
+	 */
+	thread_dispatch(old_thread);
+
+	return TRUE;
+}
+
+/*
+ *	thread_continue:
+ *
+ *	Called when the current thread is given a new stack.
+ *	Called at splsched.
+ */
+void thread_continue(
+	register thread_t old_thread)
+{
+	register continuation_t	continuation = current_thread()->swap_func;
+
+	/*
+	 *	We must dispatch the old thread and then
+	 *	call the current thread's continuation.
+	 *	There might not be an old thread, if we are
+	 *	the first thread to run on this processor.
+	 */
+
+	if (old_thread != THREAD_NULL)
+		thread_dispatch(old_thread);
+	(void) spl0();
+	(*continuation)();
+	/*NOTREACHED*/
+}
+
+
+/*
+ *	thread_block:
+ *
+ *	Block the current thread.  If the thread is runnable
+ *	then someone must have woken it up between its request
+ *	to sleep and now.  In this case, it goes back on a
+ *	run queue.
+ *
+ *	If a continuation is specified, then thread_block will
+ *	attempt to discard the thread's kernel stack.  When the
+ *	thread resumes, it will execute the continuation function
+ *	on a new kernel stack.
+ */
+
+void thread_block(
+	continuation_t	continuation)
+{
+	register thread_t thread = current_thread();
+	register processor_t myprocessor = cpu_to_processor(cpu_number());
+	register thread_t new_thread;
+	spl_t s;
+
+	check_simple_locks();
+
+	s = splsched();
+
+#if	FAST_TAS
+	{
+		extern void recover_ras();
+
+		if (csw_needed(thread, myprocessor))
+			recover_ras(thread);
+	}
+#endif	/* FAST_TAS */
+	
+	ast_off(cpu_number(), AST_BLOCK);
+
+	do
+		new_thread = thread_select(myprocessor);
+	while (!thread_invoke(thread, continuation, new_thread));
+
+	splx(s);
+}
+
+/*
+ *	thread_run:
+ *
+ *	Switch directly from the current thread to a specified
+ *	thread.  Both the current and new threads must be
+ *	runnable.
+ *
+ *	If a continuation is specified, then thread_block will
+ *	attempt to discard the current thread's kernel stack.  When the
+ *	thread resumes, it will execute the continuation function
+ *	on a new kernel stack.
+ */
+void thread_run(
+	continuation_t		continuation,
+	register thread_t	new_thread)
+{
+	register thread_t thread = current_thread();
+	register processor_t myprocessor = cpu_to_processor(cpu_number());
+	spl_t s;
+
+	check_simple_locks();
+
+	s = splsched();
+
+	while (!thread_invoke(thread, continuation, new_thread))
+		new_thread = thread_select(myprocessor);
+
+	splx(s);
+}
+
+/*
+ *	Dispatches a running thread that is not	on a runq.
+ *	Called at splsched.
+ */
+
+void thread_dispatch(
+	register thread_t	thread)
+{
+	/*
+	 *	If we are discarding the thread's stack, we must do it
+	 *	before the thread has a chance to run.
+	 */
+
+	thread_lock(thread);
+
+	if (thread->swap_func != (void (*)()) 0) {
+		assert((thread->state & TH_SWAP_STATE) == 0);
+		thread->state |= TH_SWAPPED;
+		stack_free(thread);
+	}
+
+	switch (thread->state &~ TH_SWAP_STATE) {
+	    case TH_RUN		  | TH_SUSP:
+	    case TH_RUN		  | TH_SUSP | TH_HALTED:
+	    case TH_RUN | TH_WAIT | TH_SUSP:
+		/*
+		 *	Suspend the thread
+		 */
+		thread->state &= ~TH_RUN;
+		if (thread->wake_active) {
+		    thread->wake_active = FALSE;
+		    thread_unlock(thread);
+		    thread_wakeup((event_t)&thread->wake_active);
+		    return;
+		}
+		break;
+
+	    case TH_RUN		  | TH_SUSP | TH_UNINT:
+	    case TH_RUN			    | TH_UNINT:
+	    case TH_RUN:
+		/*
+		 *	No reason to stop.  Put back on a run queue.
+		 */
+		thread_setrun(thread, FALSE);
+		break;
+
+	    case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT:
+	    case TH_RUN | TH_WAIT	    | TH_UNINT:
+	    case TH_RUN | TH_WAIT:
+		/*
+		 *	Waiting, and not suspended.
+		 */
+		thread->state &= ~TH_RUN;
+		break;
+
+	    case TH_RUN | TH_IDLE:
+		/*
+		 *	Drop idle thread -- it is already in
+		 *	idle_thread_array.
+		 */
+		break;
+
+	    default:
+		panic("thread_dispatch");
+	}
+	thread_unlock(thread);
+}
+
+
+/*
+ *	Define shifts for simulating (5/8)**n
+ */
+
+shift_data_t	wait_shift[32] = {
+	{1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7},
+	{5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13},
+	{11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18},
+	{16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}};
+
+/*
+ *	do_priority_computation:
+ *
+ *	Calculate new priority for thread based on its base priority plus
+ *	accumulated usage.  PRI_SHIFT and PRI_SHIFT_2 convert from
+ *	usage to priorities.  SCHED_SHIFT converts for the scaling
+ *	of the sched_usage field by SCHED_SCALE.  This scaling comes
+ *	from the multiplication by sched_load (thread_timer_delta)
+ *	in sched.h.  sched_load is calculated as a scaled overload
+ *	factor in compute_mach_factor (mach_factor.c).
+ */
+
+#ifdef	PRI_SHIFT_2
+#if	PRI_SHIFT_2 > 0
+#define do_priority_computation(th, pri)				\
+	MACRO_BEGIN							\
+	(pri) = (th)->priority	/* start with base priority */		\
+	    + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT))		\
+	    + ((th)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT));	\
+	if ((pri) > 31) (pri) = 31;					\
+	MACRO_END
+#else	/* PRI_SHIFT_2 */
+#define do_priority_computation(th, pri)				\
+	MACRO_BEGIN							\
+	(pri) = (th)->priority	/* start with base priority */		\
+	    + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT))		\
+	    - ((th)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2));	\
+	if ((pri) > 31) (pri) = 31;					\
+	MACRO_END
+#endif	/* PRI_SHIFT_2 */
+#else	/* defined(PRI_SHIFT_2) */
+#define do_priority_computation(th, pri)				\
+	MACRO_BEGIN							\
+	(pri) = (th)->priority	/* start with base priority */		\
+	    + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT));		\
+	if ((pri) > 31) (pri) = 31;					\
+	MACRO_END
+#endif	/* defined(PRI_SHIFT_2) */
+
+/*
+ *	compute_priority:
+ *
+ *	Compute the effective priority of the specified thread.
+ *	The effective priority computation is as follows:
+ *
+ *	Take the base priority for this thread and add
+ *	to it an increment derived from its cpu_usage.
+ *
+ *	The thread *must* be locked by the caller. 
+ */
+
+void compute_priority(
+	register thread_t	thread,
+	boolean_t		resched)
+{
+	register int	pri;
+
+#if	MACH_FIXPRI
+	if (thread->policy == POLICY_TIMESHARE) {
+#endif	/* MACH_FIXPRI */
+	    do_priority_computation(thread, pri);
+	    if (thread->depress_priority < 0)
+		set_pri(thread, pri, resched);
+	    else
+		thread->depress_priority = pri;
+#if	MACH_FIXPRI
+	}
+	else {
+	    set_pri(thread, thread->priority, resched);
+	}
+#endif	/* MACH_FIXPRI */
+}
+
+/*
+ *	compute_my_priority:
+ *
+ *	Version of compute priority for current thread or thread
+ *	being manipulated by scheduler (going on or off a runq).
+ *	Only used for priority updates.  Policy or priority changes
+ *	must call compute_priority above.  Caller must have thread
+ *	locked and know it is timesharing and not depressed.
+ */
+
+void compute_my_priority(
+	register thread_t	thread)
+{
+	register int temp_pri;
+
+	do_priority_computation(thread,temp_pri);
+	thread->sched_pri = temp_pri;
+}
+
+/*
+ *	recompute_priorities:
+ *
+ *	Update the priorities of all threads periodically.
+ */
+void recompute_priorities(void)
+{
+#if	SIMPLE_CLOCK
+	int	new_usec;
+#endif	/* SIMPLE_CLOCK */
+
+	sched_tick++;		/* age usage one more time */
+	set_timeout(&recompute_priorities_timer, hz);
+#if	SIMPLE_CLOCK
+	/*
+	 *	Compensate for clock drift.  sched_usec is an
+	 *	exponential average of the number of microseconds in
+	 *	a second.  It decays in the same fashion as cpu_usage.
+	 */
+	new_usec = sched_usec_elapsed();
+	sched_usec = (5*sched_usec + 3*new_usec)/8;
+#endif	/* SIMPLE_CLOCK */
+	/*
+	 *	Wakeup scheduler thread.
+	 */
+	if (sched_thread_id != THREAD_NULL) {
+		clear_wait(sched_thread_id, THREAD_AWAKENED, FALSE);
+	}
+}
+
+/*
+ *	update_priority
+ *
+ *	Cause the priority computation of a thread that has been 
+ *	sleeping or suspended to "catch up" with the system.  Thread
+ *	*MUST* be locked by caller.  If thread is running, then this
+ *	can only be called by the thread on itself.
+ */
+void update_priority(
+	register thread_t	thread)
+{
+	register unsigned int	ticks;
+	register shift_t	shiftp;
+	register int		temp_pri;
+
+	ticks = sched_tick - thread->sched_stamp;
+
+	assert(ticks != 0);
+
+	/*
+	 *	If asleep for more than 30 seconds forget all
+	 *	cpu_usage, else catch up on missed aging.
+	 *	5/8 ** n is approximated by the two shifts
+	 *	in the wait_shift array.
+	 */
+	thread->sched_stamp += ticks;
+	thread_timer_delta(thread);
+	if (ticks >  30) {
+		thread->cpu_usage = 0;
+		thread->sched_usage = 0;
+	}
+	else {
+		thread->cpu_usage += thread->cpu_delta;
+		thread->sched_usage += thread->sched_delta;
+		shiftp = &wait_shift[ticks];
+		if (shiftp->shift2 > 0) {
+		    thread->cpu_usage =
+			(thread->cpu_usage >> shiftp->shift1) +
+			(thread->cpu_usage >> shiftp->shift2);
+		    thread->sched_usage =
+			(thread->sched_usage >> shiftp->shift1) +
+			(thread->sched_usage >> shiftp->shift2);
+		}
+		else {
+		    thread->cpu_usage =
+			(thread->cpu_usage >> shiftp->shift1) -
+			(thread->cpu_usage >> -(shiftp->shift2));
+		    thread->sched_usage =
+			(thread->sched_usage >> shiftp->shift1) -
+			(thread->sched_usage >> -(shiftp->shift2));
+		}
+	}
+	thread->cpu_delta = 0;
+	thread->sched_delta = 0;
+	/*
+	 *	Recompute priority if appropriate.
+	 */
+	if (
+#if	MACH_FIXPRI
+	    (thread->policy == POLICY_TIMESHARE) &&
+#endif	/* MACH_FIXPRI */
+	    (thread->depress_priority < 0)) {
+		do_priority_computation(thread, temp_pri);
+		thread->sched_pri = temp_pri;
+	}
+}
+
+/*
+ *	run_queue_enqueue macro for thread_setrun().
+ */
+#if	DEBUG
+#define run_queue_enqueue(rq, th)					\
+	MACRO_BEGIN							\
+	    register unsigned int	whichq;				\
+									\
+	    whichq = (th)->sched_pri;					\
+	    if (whichq >= NRQS) {					\
+	printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri);  \
+		whichq = NRQS - 1;					\
+	    }								\
+									\
+	    simple_lock(&(rq)->lock);	/* lock the run queue */	\
+	    checkrq((rq), "thread_setrun: before adding thread");	\
+	    enqueue_tail(&(rq)->runq[whichq], (queue_entry_t) (th));	\
+									\
+	    if (whichq < (rq)->low || (rq)->count == 0) 		\
+		 (rq)->low = whichq;	/* minimize */			\
+									\
+	    (rq)->count++;						\
+	    (th)->runq = (rq);						\
+	    thread_check((th), (rq));					\
+	    checkrq((rq), "thread_setrun: after adding thread");	\
+	    simple_unlock(&(rq)->lock);					\
+	MACRO_END
+#else	/* DEBUG */
+#define run_queue_enqueue(rq, th)					\
+	MACRO_BEGIN							\
+	    register unsigned int	whichq;				\
+									\
+	    whichq = (th)->sched_pri;					\
+	    if (whichq >= NRQS) {					\
+	printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri);  \
+		whichq = NRQS - 1;					\
+	    }								\
+									\
+	    simple_lock(&(rq)->lock);	/* lock the run queue */	\
+	    enqueue_tail(&(rq)->runq[whichq], (queue_entry_t) (th));	\
+									\
+	    if (whichq < (rq)->low || (rq)->count == 0) 		\
+		 (rq)->low = whichq;	/* minimize */			\
+									\
+	    (rq)->count++;						\
+	    (th)->runq = (rq);						\
+	    simple_unlock(&(rq)->lock);					\
+	MACRO_END
+#endif	/* DEBUG */
+/*
+ *	thread_setrun:
+ *
+ *	Make thread runnable; dispatch directly onto an idle processor
+ *	if possible.  Else put on appropriate run queue (processor
+ *	if bound, else processor set.  Caller must have lock on thread.
+ *	This is always called at splsched.
+ */
+
+void thread_setrun(
+	register thread_t	th,
+	boolean_t		may_preempt)
+{
+	register processor_t	processor;
+	register run_queue_t	rq;
+#if	NCPUS > 1
+	register processor_set_t	pset;
+#endif	/* NCPUS > 1 */
+
+	/*
+	 *	Update priority if needed.
+	 */
+	if (th->sched_stamp != sched_tick) {
+		update_priority(th);
+	}
+
+	assert(th->runq == RUN_QUEUE_NULL);
+
+#if	NCPUS > 1
+	/*
+	 *	Try to dispatch the thread directly onto an idle processor.
+	 */
+	if ((processor = th->bound_processor) == PROCESSOR_NULL) {
+	    /*
+	     *	Not bound, any processor in the processor set is ok.
+	     */
+	    pset = th->processor_set;
+#if	HW_FOOTPRINT
+	    /*
+	     *	But first check the last processor it ran on.
+	     */
+	    processor = th->last_processor;
+	    if (processor->state == PROCESSOR_IDLE) {
+		    simple_lock(&processor->lock);
+		    simple_lock(&pset->idle_lock);
+		    if ((processor->state == PROCESSOR_IDLE)
+#if	MACH_HOST
+			&& (processor->processor_set == pset)
+#endif	/* MACH_HOST */
+			) {
+			    queue_remove(&pset->idle_queue, processor,
+			        processor_t, processor_queue);
+			    pset->idle_count--;
+			    processor->next_thread = th;
+			    processor->state = PROCESSOR_DISPATCHING;
+			    simple_unlock(&pset->idle_lock);
+			    simple_unlock(&processor->lock);
+		            return;
+		    }
+		    simple_unlock(&pset->idle_lock);
+		    simple_unlock(&processor->lock);
+	    }
+#endif	/* HW_FOOTPRINT */
+
+	    if (pset->idle_count > 0) {
+		simple_lock(&pset->idle_lock);
+		if (pset->idle_count > 0) {
+		    processor = (processor_t) queue_first(&pset->idle_queue);
+		    queue_remove(&(pset->idle_queue), processor, processor_t,
+				processor_queue);
+		    pset->idle_count--;
+		    processor->next_thread = th;
+		    processor->state = PROCESSOR_DISPATCHING;
+		    simple_unlock(&pset->idle_lock);
+		    return;
+		}
+		simple_unlock(&pset->idle_lock);
+	    }
+	    rq = &(pset->runq);
+	    run_queue_enqueue(rq,th);
+	    /*
+	     * Preempt check
+	     */
+	    if (may_preempt &&
+#if	MACH_HOST
+		(pset == current_processor()->processor_set) &&
+#endif	/* MACH_HOST */
+		(current_thread()->sched_pri > th->sched_pri)) {
+			/*
+			 *	Turn off first_quantum to allow csw.
+			 */
+			current_processor()->first_quantum = FALSE;
+			ast_on(cpu_number(), AST_BLOCK);
+	    }
+	}
+	else {
+	    /*
+	     *	Bound, can only run on bound processor.  Have to lock
+	     *  processor here because it may not be the current one.
+	     */
+	    if (processor->state == PROCESSOR_IDLE) {
+		simple_lock(&processor->lock);
+		pset = processor->processor_set;
+		simple_lock(&pset->idle_lock);
+		if (processor->state == PROCESSOR_IDLE) {
+		    queue_remove(&pset->idle_queue, processor,
+			processor_t, processor_queue);
+		    pset->idle_count--;
+		    processor->next_thread = th;
+		    processor->state = PROCESSOR_DISPATCHING;
+		    simple_unlock(&pset->idle_lock);
+		    simple_unlock(&processor->lock);
+		    return;
+		}
+		simple_unlock(&pset->idle_lock);
+		simple_unlock(&processor->lock);
+	    }
+	    rq = &(processor->runq);
+	    run_queue_enqueue(rq,th);
+
+	    /*
+	     *	Cause ast on processor if processor is on line.
+	     *
+	     *	XXX Don't do this remotely to master because this will
+	     *	XXX send an interprocessor interrupt, and that's too
+	     *  XXX expensive for all the unparallelized U*x code.
+	     */
+	    if (processor == current_processor()) {
+		ast_on(cpu_number(), AST_BLOCK);
+	    }
+	    else if ((processor != master_processor) &&
+	    	     (processor->state != PROCESSOR_OFF_LINE)) {
+			cause_ast_check(processor);
+	    }
+	}
+#else	/* NCPUS > 1 */
+	/*
+	 *	XXX should replace queue with a boolean in this case.
+	 */
+	if (default_pset.idle_count > 0) {
+	    processor = (processor_t) queue_first(&default_pset.idle_queue);
+	    queue_remove(&default_pset.idle_queue, processor,
+		processor_t, processor_queue);
+	    default_pset.idle_count--;
+	    processor->next_thread = th;
+	    processor->state = PROCESSOR_DISPATCHING;
+	    return;
+	}
+	if (th->bound_processor == PROCESSOR_NULL) {
+	    	rq = &(default_pset.runq);
+	}
+	else {
+		rq = &(master_processor->runq);
+		ast_on(cpu_number(), AST_BLOCK);
+	}
+	run_queue_enqueue(rq,th);
+
+	/*
+	 * Preempt check
+	 */
+	if (may_preempt && (current_thread()->sched_pri > th->sched_pri)) {
+		/*
+		 *	Turn off first_quantum to allow context switch.
+		 */
+		current_processor()->first_quantum = FALSE;
+		ast_on(cpu_number(), AST_BLOCK);
+	}
+#endif	/* NCPUS > 1 */
+}
+
+/*
+ *	set_pri:
+ *
+ *	Set the priority of the specified thread to the specified
+ *	priority.  This may cause the thread to change queues.
+ *
+ *	The thread *must* be locked by the caller.
+ */
+
+void set_pri(
+	thread_t	th,
+	int		pri,
+	boolean_t	resched)
+{
+	register struct run_queue	*rq;
+
+	rq = rem_runq(th);
+	th->sched_pri = pri;
+	if (rq != RUN_QUEUE_NULL) {
+	    if (resched)
+		thread_setrun(th, TRUE);
+	    else
+		run_queue_enqueue(rq, th);
+	}
+}
+
+/*
+ *	rem_runq:
+ *
+ *	Remove a thread from its run queue.
+ *	The run queue that the process was on is returned
+ *	(or RUN_QUEUE_NULL if not on a run queue).  Thread *must* be locked
+ *	before calling this routine.  Unusual locking protocol on runq
+ *	field in thread structure makes this code interesting; see thread.h.
+ */
+
+struct run_queue *rem_runq(
+	thread_t		th)
+{
+	register struct run_queue	*rq;
+
+	rq = th->runq;
+	/*
+	 *	If rq is RUN_QUEUE_NULL, the thread will stay out of the
+	 *	run_queues because the caller locked the thread.  Otherwise
+	 *	the thread is on a runq, but could leave.
+	 */
+	if (rq != RUN_QUEUE_NULL) {
+		simple_lock(&rq->lock);
+#if	DEBUG
+		checkrq(rq, "rem_runq: at entry");
+#endif	/* DEBUG */
+		if (rq == th->runq) {
+			/*
+			 *	Thread is in a runq and we have a lock on
+			 *	that runq.
+			 */
+#if	DEBUG
+			checkrq(rq, "rem_runq: before removing thread");
+			thread_check(th, rq);
+#endif	/* DEBUG */
+			remqueue(&rq->runq[0], (queue_entry_t) th);
+			rq->count--;
+#if	DEBUG
+			checkrq(rq, "rem_runq: after removing thread");
+#endif	/* DEBUG */
+			th->runq = RUN_QUEUE_NULL;
+			simple_unlock(&rq->lock);
+		}
+		else {
+			/*
+			 *	The thread left the runq before we could
+			 * 	lock the runq.  It is not on a runq now, and
+			 *	can't move again because this routine's
+			 *	caller locked the thread.
+			 */
+			simple_unlock(&rq->lock);
+			rq = RUN_QUEUE_NULL;
+		}
+	}
+
+	return rq;
+}
+
+
+/*
+ *	choose_thread:
+ *
+ *	Choose a thread to execute.  The thread chosen is removed
+ *	from its run queue.  Note that this requires only that the runq
+ *	lock be held.
+ *
+ *	Strategy:
+ *		Check processor runq first; if anything found, run it.
+ *		Else check pset runq; if nothing found, return idle thread.
+ *
+ *	Second line of strategy is implemented by choose_pset_thread.
+ *	This is only called on processor startup and when thread_block
+ *	thinks there's something in the processor runq.
+ */
+
+thread_t choose_thread(
+	processor_t myprocessor)
+{
+	thread_t th;
+	register queue_t q;
+	register run_queue_t runq;
+	register int i;
+	register processor_set_t pset;
+
+	runq = &myprocessor->runq;
+
+	simple_lock(&runq->lock);
+	if (runq->count > 0) {
+	    q = runq->runq + runq->low;
+	    for (i = runq->low; i < NRQS ; i++, q++) {
+		if (!queue_empty(q)) {
+		    th = (thread_t) dequeue_head(q);
+		    th->runq = RUN_QUEUE_NULL;
+		    runq->count--;
+		    runq->low = i;
+		    simple_unlock(&runq->lock);
+		    return th;
+		}
+	    }
+	    panic("choose_thread");
+	    /*NOTREACHED*/
+	}
+	simple_unlock(&runq->lock);
+
+	pset = myprocessor->processor_set;
+
+	simple_lock(&pset->runq.lock);
+	return choose_pset_thread(myprocessor,pset);
+}
+
+/*
+ *	choose_pset_thread:  choose a thread from processor_set runq or
+ *		set processor idle and choose its idle thread.
+ *
+ *	Caller must be at splsched and have a lock on the runq.  This
+ *	lock is released by this routine.  myprocessor is always the current
+ *	processor, and pset must be its processor set.
+ *	This routine chooses and removes a thread from the runq if there
+ *	is one (and returns it), else it sets the processor idle and
+ *	returns its idle thread.
+ */
+
+thread_t choose_pset_thread(
+	register processor_t	myprocessor,
+	processor_set_t		pset)
+{
+	register run_queue_t runq;
+	register thread_t th;
+	register queue_t q;
+	register int i;
+
+	runq = &pset->runq;
+
+	if (runq->count > 0) {
+	    q = runq->runq + runq->low;
+	    for (i = runq->low; i < NRQS ; i++, q++) {
+		if (!queue_empty(q)) {
+		    th = (thread_t) dequeue_head(q);
+		    th->runq = RUN_QUEUE_NULL;
+		    runq->count--;
+		    /*
+		     *	For POLICY_FIXEDPRI, runq->low must be
+		     *	accurate!
+		     */
+#if	MACH_FIXPRI
+		    if ((runq->count > 0) &&
+			(pset->policies & POLICY_FIXEDPRI)) {
+			    while (queue_empty(q)) {
+				q++;
+				i++;
+			    }
+		    }
+#endif	/* MACH_FIXPRI */
+		    runq->low = i;
+#if	DEBUG
+		    checkrq(runq, "choose_pset_thread");
+#endif	/* DEBUG */
+		    simple_unlock(&runq->lock);
+		    return th;
+		}
+	    }
+	    panic("choose_pset_thread");
+	    /*NOTREACHED*/
+	}
+	simple_unlock(&runq->lock);
+
+	/*
+	 *	Nothing is runnable, so set this processor idle if it
+	 *	was running.  If it was in an assignment or shutdown,
+	 *	leave it alone.  Return its idle thread.
+	 */
+	simple_lock(&pset->idle_lock);
+	if (myprocessor->state == PROCESSOR_RUNNING) {
+	    myprocessor->state = PROCESSOR_IDLE;
+	    /*
+	     *	XXX Until it goes away, put master on end of queue, others
+	     *	XXX on front so master gets used last.
+	     */
+	    if (myprocessor == master_processor) {
+		queue_enter(&(pset->idle_queue), myprocessor,
+			processor_t, processor_queue);
+	    }
+	    else {
+		queue_enter_first(&(pset->idle_queue), myprocessor,
+			processor_t, processor_queue);
+	    }
+
+	    pset->idle_count++;
+	}
+	simple_unlock(&pset->idle_lock);
+
+	return myprocessor->idle_thread;
+}
+
+/*
+ *	no_dispatch_count counts number of times processors go non-idle
+ *	without being dispatched.  This should be very rare.
+ */
+int	no_dispatch_count = 0;
+
+/*
+ *	This is the idle thread, which just looks for other threads
+ *	to execute.
+ */
+
+void idle_thread_continue(void)
+{
+	register processor_t myprocessor;
+	register volatile thread_t *threadp;
+	register volatile int *gcount;
+	register volatile int *lcount;
+	register thread_t new_thread;
+	register int state;
+	int mycpu;
+	spl_t s;
+
+	mycpu = cpu_number();
+	myprocessor = current_processor();
+	threadp = (volatile thread_t *) &myprocessor->next_thread;
+	lcount = (volatile int *) &myprocessor->runq.count;
+
+	while (TRUE) {
+#ifdef	MARK_CPU_IDLE
+		MARK_CPU_IDLE(mycpu);
+#endif	/* MARK_CPU_IDLE */
+
+#if	MACH_HOST
+		gcount = (volatile int *)
+				&myprocessor->processor_set->runq.count;
+#else	/* MACH_HOST */
+		gcount = (volatile int *) &default_pset.runq.count;
+#endif	/* MACH_HOST */
+
+/*
+ *	This cpu will be dispatched (by thread_setrun) by setting next_thread
+ *	to the value of the thread to run next.  Also check runq counts.
+ */
+		while ((*threadp == (volatile thread_t)THREAD_NULL) &&
+		       (*gcount == 0) && (*lcount == 0)) {
+
+			/* check for ASTs while we wait */
+
+			if (need_ast[mycpu] &~ AST_SCHEDULING) {
+				(void) splsched();
+				/* don't allow scheduling ASTs */
+				need_ast[mycpu] &= ~AST_SCHEDULING;
+				ast_taken();
+				/* back at spl0 */
+			}
+			
+			/*
+			 * machine_idle is a machine dependent function,
+			 * to conserve power.
+			 */
+#if	POWER_SAVE
+			machine_idle(mycpu);
+#endif /* POWER_SAVE */
+		}
+
+#ifdef	MARK_CPU_ACTIVE
+		MARK_CPU_ACTIVE(mycpu);
+#endif	/* MARK_CPU_ACTIVE */
+
+		s = splsched();
+
+		/*
+		 *	This is not a switch statement to avoid the
+		 *	bounds checking code in the common case.
+		 */
+retry:
+		state = myprocessor->state;
+		if (state == PROCESSOR_DISPATCHING) {
+			/*
+			 *	Commmon case -- cpu dispatched.
+			 */
+			new_thread = (thread_t) *threadp;
+			*threadp = (volatile thread_t) THREAD_NULL;
+			myprocessor->state = PROCESSOR_RUNNING;
+			/*
+			 *	set up quantum for new thread.
+			 */
+#if	MACH_FIXPRI
+			if (new_thread->policy == POLICY_TIMESHARE) {
+#endif	/* MACH_FIXPRI */
+				/*
+				 *  Just use set quantum.  No point in
+				 *  checking for shorter local runq quantum;
+				 *  csw_needed will handle correctly.
+				 */
+#if	MACH_HOST
+				myprocessor->quantum = new_thread->
+					processor_set->set_quantum;
+#else	/* MACH_HOST */
+				myprocessor->quantum =
+					default_pset.set_quantum;
+#endif	/* MACH_HOST */
+
+#if	MACH_FIXPRI
+			}
+			else {
+				/*
+				 *	POLICY_FIXEDPRI
+				 */
+				myprocessor->quantum = new_thread->sched_data;
+			}
+#endif	/* MACH_FIXPRI */
+			myprocessor->first_quantum = TRUE;
+			counter(c_idle_thread_handoff++);
+			thread_run(idle_thread_continue, new_thread);
+		}
+		else if (state == PROCESSOR_IDLE) {
+			register processor_set_t pset;
+
+			pset = myprocessor->processor_set;
+			simple_lock(&pset->idle_lock);
+			if (myprocessor->state != PROCESSOR_IDLE) {
+				/*
+				 *	Something happened, try again.
+				 */
+				simple_unlock(&pset->idle_lock);
+				goto retry;
+			}
+			/*
+			 *	Processor was not dispatched (Rare).
+			 *	Set it running again.
+			 */
+			no_dispatch_count++;
+			pset->idle_count--;
+			queue_remove(&pset->idle_queue, myprocessor,
+				processor_t, processor_queue);
+			myprocessor->state = PROCESSOR_RUNNING;
+			simple_unlock(&pset->idle_lock);
+			counter(c_idle_thread_block++);
+			thread_block(idle_thread_continue);
+		}
+		else if ((state == PROCESSOR_ASSIGN) ||
+			 (state == PROCESSOR_SHUTDOWN)) {
+			/*
+			 *	Changing processor sets, or going off-line.
+			 *	Release next_thread if there is one.  Actual
+			 *	thread to run is on a runq.
+			 */
+			if ((new_thread = (thread_t)*threadp)!= THREAD_NULL) {
+				*threadp = (volatile thread_t) THREAD_NULL;
+				thread_setrun(new_thread, FALSE);
+			}
+
+			counter(c_idle_thread_block++);
+			thread_block(idle_thread_continue);
+		}
+		else {
+			printf(" Bad processor state %d (Cpu %d)\n",
+				cpu_state(mycpu), mycpu);
+			panic("idle_thread");
+		}
+
+		(void) splx(s);
+	}
+}
+
+void idle_thread(void)
+{
+	register thread_t self = current_thread();
+	spl_t s;
+
+	stack_privilege(self);
+
+	s = splsched();
+	self->priority = 31;
+	self->sched_pri = 31;
+
+	/*
+	 *	Set the idle flag to indicate that this is an idle thread,
+	 *	enter ourselves in the idle array, and thread_block() to get
+	 *	out of the run queues (and set the processor idle when we
+	 *	run next time).
+	 */
+	thread_lock(self);
+	self->state |= TH_IDLE;
+	thread_unlock(self);
+	current_processor()->idle_thread = self;
+	(void) splx(s);
+
+	counter(c_idle_thread_block++);
+	thread_block(idle_thread_continue);
+	idle_thread_continue();
+	/*NOTREACHED*/
+}
+
+/*
+ *	sched_thread: scheduler thread.
+ *
+ *	This thread handles periodic calculations in the scheduler that
+ *	we don't want to do at interrupt level.  This allows us to
+ *	avoid blocking.
+ */
+void sched_thread_continue(void)
+{
+    while (TRUE) {
+	(void) compute_mach_factor();
+
+	/*
+	 *	Check for stuck threads.  This can't be done off of
+	 *	the callout queue because it requires operations that
+	 *	can't be used from interrupt level.
+	 */
+	if (sched_tick & 1)
+	    	do_thread_scan();
+
+	assert_wait((event_t) 0, FALSE);
+	counter(c_sched_thread_block++);
+	thread_block(sched_thread_continue);
+    }
+}
+
+void sched_thread(void)
+{
+    sched_thread_id = current_thread();
+
+    /*
+     *	Sleep on event 0, recompute_priorities() will awaken
+     *	us by calling clear_wait().
+     */
+    assert_wait((event_t) 0, FALSE);
+    counter(c_sched_thread_block++);
+    thread_block(sched_thread_continue);
+    sched_thread_continue();
+    /*NOTREACHED*/
+}
+
+#define	MAX_STUCK_THREADS	16
+
+/*
+ *	do_thread_scan: scan for stuck threads.  A thread is stuck if
+ *	it is runnable but its priority is so low that it has not
+ *	run for several seconds.  Its priority should be higher, but
+ *	won't be until it runs and calls update_priority.  The scanner
+ *	finds these threads and does the updates.
+ *
+ *	Scanner runs in two passes.  Pass one squirrels likely
+ *	thread ids away in an array, and removes them from the run queue.
+ *	Pass two does the priority updates.  This is necessary because
+ *	the run queue lock is required for the candidate scan, but
+ *	cannot be held during updates [set_pri will deadlock].
+ *
+ *	Array length should be enough so that restart isn't necessary,
+ *	but restart logic is included.  Does not scan processor runqs.
+ *
+ */
+
+boolean_t do_thread_scan_debug = FALSE;
+
+thread_t		stuck_threads[MAX_STUCK_THREADS];
+int			stuck_count = 0;
+
+/*
+ *	do_runq_scan is the guts of pass 1.  It scans a runq for
+ *	stuck threads.  A boolean is returned indicating whether
+ *	it ran out of space.
+ */
+
+boolean_t
+do_runq_scan(
+	run_queue_t	runq)
+{
+	register spl_t		s;
+	register queue_t	q;
+	register thread_t	thread;
+	register int		count;
+
+	s = splsched();
+	simple_lock(&runq->lock);
+	if((count = runq->count) > 0) {
+	    q = runq->runq + runq->low;
+	    while (count > 0) {
+		thread = (thread_t) queue_first(q);
+		while (!queue_end(q, (queue_entry_t) thread)) {
+		    /*
+		     *	Get the next thread now, since we may
+		     *	remove this thread from the run queue.
+		     */
+		    thread_t next = (thread_t) queue_next(&thread->links);
+
+		    if ((thread->state & TH_SCHED_STATE) == TH_RUN &&
+			sched_tick - thread->sched_stamp > 1) {
+			    /*
+			     *	Stuck, save its id for later.
+			     */
+			    if (stuck_count == MAX_STUCK_THREADS) {
+				/*
+				 *	!@#$% No more room.
+				 */
+				simple_unlock(&runq->lock);
+				splx(s);
+				return TRUE;
+			    }
+			    /*
+			     *	We can`t take the thread_lock here,
+			     *	since we already have the runq lock.
+			     *	So we can`t grab a reference to the
+			     *	thread.  However, a thread that is
+			     *	in RUN state cannot be deallocated
+			     *	until it stops running.  If it isn`t
+			     *	on the runq, then thread_halt cannot
+			     *	see it.  So we remove the thread
+			     *	from the runq to make it safe.
+			     */
+			    remqueue(q, (queue_entry_t) thread);
+			    runq->count--;
+			    thread->runq = RUN_QUEUE_NULL;
+
+			    stuck_threads[stuck_count++] = thread;
+if (do_thread_scan_debug)
+    printf("do_runq_scan: adding thread %#x\n", thread);
+		    }
+		    count--;
+		    thread = next;
+		}
+		q++;
+	    }
+	}
+	simple_unlock(&runq->lock);
+	splx(s);
+
+	return FALSE;
+}
+
+void do_thread_scan(void)
+{
+	register spl_t		s;
+	register boolean_t	restart_needed = 0;
+	register thread_t	thread;
+#if	MACH_HOST
+	register processor_set_t	pset;
+#endif	/* MACH_HOST */
+
+	do {
+#if	MACH_HOST
+	    simple_lock(&all_psets_lock);
+	    queue_iterate(&all_psets, pset, processor_set_t, all_psets) {
+		if (restart_needed = do_runq_scan(&pset->runq))
+			break;
+	    }
+	    simple_unlock(&all_psets_lock);
+#else	/* MACH_HOST */
+	    restart_needed = do_runq_scan(&default_pset.runq);
+#endif	/* MACH_HOST */
+	    if (!restart_needed)
+	    	restart_needed = do_runq_scan(&master_processor->runq);
+
+	    /*
+	     *	Ok, we now have a collection of candidates -- fix them.
+	     */
+
+	    while (stuck_count > 0) {
+		thread = stuck_threads[--stuck_count];
+		stuck_threads[stuck_count] = THREAD_NULL;
+		s = splsched();
+		thread_lock(thread);
+		if ((thread->state & TH_SCHED_STATE) == TH_RUN) {
+			/*
+			 *	Do the priority update.  Call
+			 *	thread_setrun because thread is
+			 *	off the run queues.
+			 */
+			update_priority(thread);
+			thread_setrun(thread, TRUE);
+		}
+		thread_unlock(thread);
+		splx(s);
+	    }
+	} while (restart_needed);
+}
+		
+#if	DEBUG
+void checkrq(
+	run_queue_t	rq,
+	char		*msg)
+{
+	register queue_t	q1;
+	register int		i, j;
+	register queue_entry_t	e;
+	register int		low;
+
+	low = -1;
+	j = 0;
+	q1 = rq->runq;
+	for (i = 0; i < NRQS; i++) {
+	    if (q1->next == q1) {
+		if (q1->prev != q1)
+		    panic("checkrq: empty at %s", msg);
+	    }
+	    else {
+		if (low == -1)
+		    low = i;
+		
+		for (e = q1->next; e != q1; e = e->next) {
+		    j++;
+		    if (e->next->prev != e)
+			panic("checkrq-2 at %s", msg);
+		    if (e->prev->next != e)
+			panic("checkrq-3 at %s", msg);
+		}
+	    }
+	    q1++;
+	}
+	if (j != rq->count)
+	    panic("checkrq: count wrong at %s", msg);
+	if (rq->count != 0 && low < rq->low)
+	    panic("checkrq: low wrong at %s", msg);
+}
+
+void thread_check(
+	register thread_t	th,
+	register run_queue_t	rq)
+{
+	register unsigned int 	whichq;
+
+	whichq = th->sched_pri;
+	if (whichq >= NRQS) {
+		printf("thread_check: priority too high\n");
+		whichq = NRQS-1;
+	}
+	if ((th->links.next == &rq->runq[whichq]) &&
+		(rq->runq[whichq].prev != (queue_entry_t)th))
+			panic("thread_check");
+}
+#endif	/* DEBUG */
author	Thomas Bushnell <thomas@gnu.org>	1997-02-25 21:28:37 +0000
committer	Thomas Bushnell <thomas@gnu.org>	1997-02-25 21:28:37 +0000
commit	f07a4c844da9f0ecae5bbee1ab94be56505f26f7 (patch)
tree	12b07c7e578fc1a5f53dbfde2632408491ff2a70 /kern/sched_prim.c