/* * Mach Operating System * Copyright (c) 1993-1987 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* * File: sched_prim.c * Author: Avadis Tevanian, Jr. * Date: 1986 * * Scheduling primitives * */ #include #include #include #include /* For def'n of splsched() */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if MACH_FIXPRI #include #endif /* MACH_FIXPRI */ int min_quantum; /* defines max context switch rate */ unsigned sched_tick; #if SIMPLE_CLOCK int sched_usec; #endif /* SIMPLE_CLOCK */ thread_t sched_thread_id; timer_elt_data_t recompute_priorities_timer; /* * State machine * * states are combinations of: * R running * W waiting (or on wait queue) * S suspended (or will suspend) * N non-interruptible * * init action * assert_wait thread_block clear_wait suspend resume * * R RW, RWN R; setrun - RS - * RS RWS, RWNS S; wake_active - - R * RN RWN RN; setrun - RNS - * RNS RWNS RNS; setrun - - RN * * RW W R RWS - * RWN WN RN RWNS - * RWS WS; wake_active RS - RW * RWNS WNS RNS - RWN * * W R; setrun WS - * WN RN; setrun WNS - * WNS RNS; setrun - WN * * S - - R * WS S - W * */ /* * Waiting protocols and implementation: * * Each thread may be waiting for exactly one event; this event * is set using assert_wait(). That thread may be awakened either * by performing a thread_wakeup_prim() on its event, * or by directly waking that thread up with clear_wait(). * * The implementation of wait events uses a hash table. Each * bucket is queue of threads having the same hash function * value; the chain for the queue (linked list) is the run queue * field. [It is not possible to be waiting and runnable at the * same time.] * * Locks on both the thread and on the hash buckets govern the * wait event field and the queue chain field. Because wakeup * operations only have the event as an argument, the event hash * bucket must be locked before any thread. * * Scheduling operations may also occur at interrupt level; therefore, * interrupts below splsched() must be prevented when holding * thread or hash bucket locks. * * The wait event hash table declarations are as follows: */ #define NUMQUEUES 1031 queue_head_t wait_queue[NUMQUEUES]; decl_simple_lock_data(, wait_lock[NUMQUEUES]) /* NOTE: we want a small positive integer out of this */ #define wait_hash(event) \ ((((long)(event) < 0) ? ~(long)(event) : (long)(event)) % NUMQUEUES) void wait_queue_init(void) { int i; for (i = 0; i < NUMQUEUES; i++) { queue_init(&wait_queue[i]); simple_lock_init(&wait_lock[i]); } } void sched_init(void) { recompute_priorities_timer.fcn = recompute_priorities; recompute_priorities_timer.param = NULL; min_quantum = hz / 10; /* context switch 10 times/second */ wait_queue_init(); pset_sys_bootstrap(); /* initialize processer mgmt. */ queue_init(&action_queue); simple_lock_init(&action_lock); sched_tick = 0; #if SIMPLE_CLOCK sched_usec = 0; #endif /* SIMPLE_CLOCK */ ast_init(); } /* * Thread timeout routine, called when timer expires. * Called at splsoftclock. */ void thread_timeout( void *_thread) { thread_t thread = _thread; assert(thread->timer.set == TELT_UNSET); clear_wait(thread, THREAD_TIMED_OUT, FALSE); } /* * thread_set_timeout: * * Set a timer for the current thread, if the thread * is ready to wait. Must be called between assert_wait() * and thread_block(). */ void thread_set_timeout( int t) /* timeout interval in ticks */ { thread_t thread = current_thread(); spl_t s; s = splsched(); thread_lock(thread); if ((thread->state & TH_WAIT) != 0) { set_timeout(&thread->timer, t); } thread_unlock(thread); splx(s); } /* * Set up thread timeout element when thread is created. */ void thread_timeout_setup( thread_t thread) { thread->timer.fcn = thread_timeout; thread->timer.param = thread; thread->depress_timer.fcn = (void (*)(void*))thread_depress_timeout; thread->depress_timer.param = thread; } /* * assert_wait: * * Assert that the current thread is about to go to * sleep until the specified event occurs. */ void assert_wait( event_t event, boolean_t interruptible) { queue_t q; int index; thread_t thread; #if MACH_SLOCKS simple_lock_t lock; #endif /* MACH_SLOCKS */ spl_t s; thread = current_thread(); if (thread->wait_event != 0) { panic("assert_wait: already asserted event %#x\n", thread->wait_event); } s = splsched(); if (event != 0) { index = wait_hash(event); q = &wait_queue[index]; #if MACH_SLOCKS lock = &wait_lock[index]; #endif /* MACH_SLOCKS */ simple_lock(lock); thread_lock(thread); enqueue_tail(q, &(thread->links)); thread->wait_event = event; if (interruptible) thread->state |= TH_WAIT; else thread->state |= TH_WAIT | TH_UNINT; thread_unlock(thread); simple_unlock(lock); } else { thread_lock(thread); if (interruptible) thread->state |= TH_WAIT; else thread->state |= TH_WAIT | TH_UNINT; thread_unlock(thread); } splx(s); } /* * clear_wait: * * Clear the wait condition for the specified thread. Start the thread * executing if that is appropriate. * * parameters: * thread thread to awaken * result Wakeup result the thread should see * interrupt_only Don't wake up the thread if it isn't * interruptible. */ void clear_wait( thread_t thread, int result, boolean_t interrupt_only) { int index; queue_t q; #if MACH_SLOCKS simple_lock_t lock; #endif /* MACH_SLOCKS */ event_t event; spl_t s; s = splsched(); thread_lock(thread); if (interrupt_only && (thread->state & TH_UNINT)) { /* * can`t interrupt thread */ thread_unlock(thread); splx(s); return; } event = thread->wait_event; if (event != 0) { thread_unlock(thread); index = wait_hash(event); q = &wait_queue[index]; #if MACH_SLOCKS lock = &wait_lock[index]; #endif /* MACH_SLOCKS */ simple_lock(lock); /* * If the thread is still waiting on that event, * then remove it from the list. If it is waiting * on a different event, or no event at all, then * someone else did our job for us. */ thread_lock(thread); if (thread->wait_event == event) { remqueue(q, (queue_entry_t)thread); thread->wait_event = 0; event = 0; /* cause to run below */ } simple_unlock(lock); } if (event == 0) { int state = thread->state; reset_timeout_check(&thread->timer); switch (state & TH_SCHED_STATE) { case TH_WAIT | TH_SUSP | TH_UNINT: case TH_WAIT | TH_UNINT: case TH_WAIT: /* * Sleeping and not suspendable - put * on run queue. */ thread->state = (state &~ TH_WAIT) | TH_RUN; thread->wait_result = result; thread_setrun(thread, TRUE); break; case TH_WAIT | TH_SUSP: case TH_RUN | TH_WAIT: case TH_RUN | TH_WAIT | TH_SUSP: case TH_RUN | TH_WAIT | TH_UNINT: case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: /* * Either already running, or suspended. */ thread->state = state &~ TH_WAIT; thread->wait_result = result; break; default: /* * Not waiting. */ break; } } thread_unlock(thread); splx(s); } static inline void __attribute__((noreturn)) state_panic(thread_t thread, const char *caller) { panic ("%s: thread %x has unexpected state %x", caller, thread, thread->state); } /* * thread_wakeup_prim: * * Common routine for thread_wakeup, thread_wakeup_with_result, * and thread_wakeup_one. * */ void thread_wakeup_prim( event_t event, boolean_t one_thread, int result) { queue_t q; int index; thread_t thread, next_th; #if MACH_SLOCKS simple_lock_t lock; #endif /* MACH_SLOCKS */ spl_t s; int state; index = wait_hash(event); q = &wait_queue[index]; s = splsched(); #if MACH_SLOCKS lock = &wait_lock[index]; #endif /* MACH_SLOCKS */ simple_lock(lock); thread = (thread_t) queue_first(q); while (!queue_end(q, (queue_entry_t)thread)) { next_th = (thread_t) queue_next((queue_t) thread); if (thread->wait_event == event) { thread_lock(thread); remqueue(q, (queue_entry_t) thread); thread->wait_event = 0; reset_timeout_check(&thread->timer); state = thread->state; switch (state & TH_SCHED_STATE) { case TH_WAIT | TH_SUSP | TH_UNINT: case TH_WAIT | TH_UNINT: case TH_WAIT: /* * Sleeping and not suspendable - put * on run queue. */ thread->state = (state &~ TH_WAIT) | TH_RUN; thread->wait_result = result; thread_setrun(thread, TRUE); break; case TH_WAIT | TH_SUSP: case TH_RUN | TH_WAIT: case TH_RUN | TH_WAIT | TH_SUSP: case TH_RUN | TH_WAIT | TH_UNINT: case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: /* * Either already running, or suspended. */ thread->state = state &~ TH_WAIT; thread->wait_result = result; break; default: state_panic(thread, "thread_wakeup"); break; } thread_unlock(thread); if (one_thread) break; } thread = next_th; } simple_unlock(lock); splx(s); } /* * thread_sleep: * * Cause the current thread to wait until the specified event * occurs. The specified lock is unlocked before releasing * the cpu. (This is a convenient way to sleep without manually * calling assert_wait). */ void thread_sleep( event_t event, simple_lock_t lock, boolean_t interruptible) { assert_wait(event, interruptible); /* assert event */ simple_unlock(lock); /* release the lock */ thread_block((void (*)()) 0); /* block ourselves */ } /* * thread_bind: * * Force a thread to execute on the specified processor. * If the thread is currently executing, it may wait until its * time slice is up before switching onto the specified processor. * * A processor of PROCESSOR_NULL causes the thread to be unbound. * xxx - DO NOT export this to users. */ void thread_bind( thread_t thread, processor_t processor) { spl_t s; s = splsched(); thread_lock(thread); thread->bound_processor = processor; thread_unlock(thread); (void) splx(s); } /* * Select a thread for this processor (the current processor) to run. * May select the current thread. * Assumes splsched. */ thread_t thread_select( processor_t myprocessor) { thread_t thread; myprocessor->first_quantum = TRUE; /* * Check for obvious simple case; local runq is * empty and global runq has entry at hint. */ if (myprocessor->runq.count > 0) { thread = choose_thread(myprocessor); myprocessor->quantum = min_quantum; } else { processor_set_t pset; #if MACH_HOST pset = myprocessor->processor_set; #else /* MACH_HOST */ pset = &default_pset; #endif /* MACH_HOST */ simple_lock(&pset->runq.lock); #if DEBUG checkrq(&pset->runq, "thread_select"); #endif /* DEBUG */ if (pset->runq.count == 0) { /* * Nothing else runnable. Return if this * thread is still runnable on this processor. * Check for priority update if required. */ thread = current_thread(); if ((thread->state == TH_RUN) && #if MACH_HOST (thread->processor_set == pset) && #endif /* MACH_HOST */ ((thread->bound_processor == PROCESSOR_NULL) || (thread->bound_processor == myprocessor))) { simple_unlock(&pset->runq.lock); thread_lock(thread); if (thread->sched_stamp != sched_tick) update_priority(thread); thread_unlock(thread); } else { thread = choose_pset_thread(myprocessor, pset); } } else { queue_t q; /* * If there is a thread at hint, grab it, * else call choose_pset_thread. */ q = pset->runq.runq + pset->runq.low; if (queue_empty(q)) { pset->runq.low++; thread = choose_pset_thread(myprocessor, pset); } else { thread = (thread_t) dequeue_head(q); thread->runq = RUN_QUEUE_NULL; pset->runq.count--; #if MACH_FIXPRI /* * Cannot lazy evaluate pset->runq.low for * fixed priority policy */ if ((pset->runq.count > 0) && (pset->policies & POLICY_FIXEDPRI)) { while (queue_empty(q)) { pset->runq.low++; q++; } } #endif /* MACH_FIXPRI */ #if DEBUG checkrq(&pset->runq, "thread_select: after"); #endif /* DEBUG */ simple_unlock(&pset->runq.lock); } } #if MACH_FIXPRI if (thread->policy == POLICY_TIMESHARE) { #endif /* MACH_FIXPRI */ myprocessor->quantum = pset->set_quantum; #if MACH_FIXPRI } else { /* * POLICY_FIXEDPRI */ myprocessor->quantum = thread->sched_data; } #endif /* MACH_FIXPRI */ } return thread; } /* * Stop running the current thread and start running the new thread. * If continuation is non-zero, and the current thread is blocked, * then it will resume by executing continuation on a new stack. * Returns TRUE if the hand-off succeeds. * Assumes splsched. */ boolean_t thread_invoke( thread_t old_thread, continuation_t continuation, thread_t new_thread) { /* * Check for invoking the same thread. */ if (old_thread == new_thread) { /* * Mark thread interruptible. * Run continuation if there is one. */ thread_lock(new_thread); new_thread->state &= ~TH_UNINT; thread_unlock(new_thread); thread_wakeup(&new_thread->state); if (continuation != (void (*)()) 0) { (void) spl0(); call_continuation(continuation); /*NOTREACHED*/ } return TRUE; } /* * Check for stack-handoff. */ thread_lock(new_thread); if ((old_thread->stack_privilege != current_stack()) && (continuation != (void (*)()) 0)) { switch (new_thread->state & TH_SWAP_STATE) { case TH_SWAPPED: new_thread->state &= ~(TH_SWAPPED | TH_UNINT); thread_unlock(new_thread); thread_wakeup(&new_thread->state); #if NCPUS > 1 new_thread->last_processor = current_processor(); #endif /* NCPUS > 1 */ /* * Set up ast context of new thread and * switch to its timer. */ ast_context(new_thread, cpu_number()); timer_switch(&new_thread->system_timer); stack_handoff(old_thread, new_thread); /* * We can dispatch the old thread now. * This is like thread_dispatch, except * that the old thread is left swapped * *without* freeing its stack. * This path is also much more frequent * than actual calls to thread_dispatch. */ thread_lock(old_thread); old_thread->swap_func = continuation; switch (old_thread->state) { case TH_RUN | TH_SUSP: case TH_RUN | TH_SUSP | TH_HALTED: case TH_RUN | TH_WAIT | TH_SUSP: /* * Suspend the thread */ old_thread->state = (old_thread->state & ~TH_RUN) | TH_SWAPPED; if (old_thread->wake_active) { old_thread->wake_active = FALSE; thread_unlock(old_thread); thread_wakeup((event_t)&old_thread->wake_active); goto after_old_thread; } break; case TH_RUN | TH_SUSP | TH_UNINT: case TH_RUN | TH_UNINT: case TH_RUN: /* * We can`t suspend the thread yet, * or it`s still running. * Put back on a run queue. */ old_thread->state |= TH_SWAPPED; thread_setrun(old_thread, FALSE); break; case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: case TH_RUN | TH_WAIT | TH_UNINT: case TH_RUN | TH_WAIT: /* * Waiting, and not suspendable. */ old_thread->state = (old_thread->state & ~TH_RUN) | TH_SWAPPED; break; case TH_RUN | TH_IDLE: /* * Drop idle thread -- it is already in * idle_thread_array. */ old_thread->state = TH_RUN | TH_IDLE | TH_SWAPPED; break; default: state_panic(old_thread, "thread_invoke"); } thread_unlock(old_thread); after_old_thread: /* * call_continuation calls the continuation * after resetting the current stack pointer * to recover stack space. If we called * the continuation directly, we would risk * running out of stack. */ counter_always(c_thread_invoke_hits++); (void) spl0(); call_continuation(new_thread->swap_func); /*NOTREACHED*/ return TRUE; /* help for the compiler */ case TH_SW_COMING_IN: /* * Waiting for a stack */ thread_swapin(new_thread); thread_unlock(new_thread); counter_always(c_thread_invoke_misses++); return FALSE; case 0: /* * Already has a stack - can`t handoff. */ break; } } else { /* * Check that the thread is swapped-in. */ if (new_thread->state & TH_SWAPPED) { if ((new_thread->state & TH_SW_COMING_IN) || !stack_alloc_try(new_thread, thread_continue)) { thread_swapin(new_thread); thread_unlock(new_thread); counter_always(c_thread_invoke_misses++); return FALSE; } } } new_thread->state &= ~(TH_SWAPPED | TH_UNINT); thread_unlock(new_thread); thread_wakeup(&new_thread->state); /* * Thread is now interruptible. */ #if NCPUS > 1 new_thread->last_processor = current_processor(); #endif /* NCPUS > 1 */ /* * Set up ast context of new thread and switch to its timer. */ ast_context(new_thread, cpu_number()); timer_switch(&new_thread->system_timer); /* * switch_context is machine-dependent. It does the * machine-dependent components of a context-switch, like * changing address spaces. It updates active_threads. * It returns only if a continuation is not supplied. */ counter_always(c_thread_invoke_csw++); old_thread = switch_context(old_thread, continuation, new_thread); /* * We're back. Now old_thread is the thread that resumed * us, and we have to dispatch it. */ thread_dispatch(old_thread); return TRUE; } /* * thread_continue: * * Called when the current thread is given a new stack. * Called at splsched. */ void thread_continue( thread_t old_thread) { continuation_t continuation = current_thread()->swap_func; /* * We must dispatch the old thread and then * call the current thread's continuation. * There might not be an old thread, if we are * the first thread to run on this processor. */ if (old_thread != THREAD_NULL) thread_dispatch(old_thread); (void) spl0(); (*continuation)(); /*NOTREACHED*/ } /* * thread_block: * * Block the current thread. If the thread is runnable * then someone must have woken it up between its request * to sleep and now. In this case, it goes back on a * run queue. * * If a continuation is specified, then thread_block will * attempt to discard the thread's kernel stack. When the * thread resumes, it will execute the continuation function * on a new kernel stack. */ void thread_block( continuation_t continuation) { thread_t thread = current_thread(); processor_t myprocessor = cpu_to_processor(cpu_number()); thread_t new_thread; spl_t s; check_simple_locks(); s = splsched(); #if FAST_TAS { extern void recover_ras(); if (csw_needed(thread, myprocessor)) recover_ras(thread); } #endif /* FAST_TAS */ ast_off(cpu_number(), AST_BLOCK); do new_thread = thread_select(myprocessor); while (!thread_invoke(thread, continuation, new_thread)); splx(s); } /* * thread_run: * * Switch directly from the current thread to a specified * thread. Both the current and new threads must be * runnable. * * If a continuation is specified, then thread_block will * attempt to discard the current thread's kernel stack. When the * thread resumes, it will execute the continuation function * on a new kernel stack. */ void thread_run( continuation_t continuation, thread_t new_thread) { thread_t thread = current_thread(); processor_t myprocessor = cpu_to_processor(cpu_number()); spl_t s; check_simple_locks(); s = splsched(); while (!thread_invoke(thread, continuation, new_thread)) new_thread = thread_select(myprocessor); splx(s); } /* * Dispatches a running thread that is not on a runq. * Called at splsched. */ void thread_dispatch( thread_t thread) { /* * If we are discarding the thread's stack, we must do it * before the thread has a chance to run. */ thread_lock(thread); if (thread->swap_func != (void (*)()) 0) { assert((thread->state & TH_SWAP_STATE) == 0); thread->state |= TH_SWAPPED; stack_free(thread); } switch (thread->state &~ TH_SWAP_STATE) { case TH_RUN | TH_SUSP: case TH_RUN | TH_SUSP | TH_HALTED: case TH_RUN | TH_WAIT | TH_SUSP: /* * Suspend the thread */ thread->state &= ~TH_RUN; if (thread->wake_active) { thread->wake_active = FALSE; thread_unlock(thread); thread_wakeup((event_t)&thread->wake_active); return; } break; case TH_RUN | TH_SUSP | TH_UNINT: case TH_RUN | TH_UNINT: case TH_RUN: /* * No reason to stop. Put back on a run queue. */ thread_setrun(thread, FALSE); break; case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: case TH_RUN | TH_WAIT | TH_UNINT: case TH_RUN | TH_WAIT: /* * Waiting, and not suspended. */ thread->state &= ~TH_RUN; break; case TH_RUN | TH_IDLE: /* * Drop idle thread -- it is already in * idle_thread_array. */ break; default: state_panic(thread, "thread_dispatch"); } thread_unlock(thread); } /* * Define shifts for simulating (5/8)**n */ shift_data_t wait_shift[32] = { {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7}, {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13}, {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18}, {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}}; /* * do_priority_computation: * * Calculate new priority for thread based on its base priority plus * accumulated usage. PRI_SHIFT and PRI_SHIFT_2 convert from * usage to priorities. SCHED_SHIFT converts for the scaling * of the sched_usage field by SCHED_SCALE. This scaling comes * from the multiplication by sched_load (thread_timer_delta) * in sched.h. sched_load is calculated as a scaled overload * factor in compute_mach_factor (mach_factor.c). */ #ifdef PRI_SHIFT_2 #if PRI_SHIFT_2 > 0 #define do_priority_computation(th, pri) \ MACRO_BEGIN \ (pri) = (th)->priority /* start with base priority */ \ + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ + ((th)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT)); \ if ((pri) > NRQS - 1) (pri) = NRQS - 1; \ MACRO_END #else /* PRI_SHIFT_2 */ #define do_priority_computation(th, pri) \ MACRO_BEGIN \ (pri) = (th)->priority /* start with base priority */ \ + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ - ((th)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2)); \ if ((pri) > NRQS - 1) (pri) = NRQS - 1; \ MACRO_END #endif /* PRI_SHIFT_2 */ #else /* defined(PRI_SHIFT_2) */ #define do_priority_computation(th, pri) \ MACRO_BEGIN \ (pri) = (th)->priority /* start with base priority */ \ + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)); \ if ((pri) > NRQS - 1) (pri) = NRQS - 1; \ MACRO_END #endif /* defined(PRI_SHIFT_2) */ /* * compute_priority: * * Compute the effective priority of the specified thread. * The effective priority computation is as follows: * * Take the base priority for this thread and add * to it an increment derived from its cpu_usage. * * The thread *must* be locked by the caller. */ void compute_priority( thread_t thread, boolean_t resched) { int pri; #if MACH_FIXPRI if (thread->policy == POLICY_TIMESHARE) { #endif /* MACH_FIXPRI */ do_priority_computation(thread, pri); if (thread->depress_priority < 0) set_pri(thread, pri, resched); else thread->depress_priority = pri; #if MACH_FIXPRI } else { set_pri(thread, thread->priority, resched); } #endif /* MACH_FIXPRI */ } /* * compute_my_priority: * * Version of compute priority for current thread or thread * being manipulated by scheduler (going on or off a runq). * Only used for priority updates. Policy or priority changes * must call compute_priority above. Caller must have thread * locked and know it is timesharing and not depressed. */ void compute_my_priority( thread_t thread) { int temp_pri; do_priority_computation(thread,temp_pri); thread->sched_pri = temp_pri; } /* * recompute_priorities: * * Update the priorities of all threads periodically. */ void recompute_priorities(void *param) { #if SIMPLE_CLOCK int new_usec; #endif /* SIMPLE_CLOCK */ sched_tick++; /* age usage one more time */ set_timeout(&recompute_priorities_timer, hz); #if SIMPLE_CLOCK /* * Compensate for clock drift. sched_usec is an * exponential average of the number of microseconds in * a second. It decays in the same fashion as cpu_usage. */ new_usec = sched_usec_elapsed(); sched_usec = (5*sched_usec + 3*new_usec)/8; #endif /* SIMPLE_CLOCK */ /* * Wakeup scheduler thread. */ if (sched_thread_id != THREAD_NULL) { clear_wait(sched_thread_id, THREAD_AWAKENED, FALSE); } } /* * update_priority * * Cause the priority computation of a thread that has been * sleeping or suspended to "catch up" with the system. Thread * *MUST* be locked by caller. If thread is running, then this * can only be called by the thread on itself. */ void update_priority( thread_t thread) { unsigned int ticks; shift_t shiftp; int temp_pri; ticks = sched_tick - thread->sched_stamp; assert(ticks != 0); /* * If asleep for more than 30 seconds forget all * cpu_usage, else catch up on missed aging. * 5/8 ** n is approximated by the two shifts * in the wait_shift array. */ thread->sched_stamp += ticks; thread_timer_delta(thread); if (ticks > 30) { thread->cpu_usage = 0; thread->sched_usage = 0; } else { thread->cpu_usage += thread->cpu_delta; thread->sched_usage += thread->sched_delta; shiftp = &wait_shift[ticks]; if (shiftp->shift2 > 0) { thread->cpu_usage = (thread->cpu_usage >> shiftp->shift1) + (thread->cpu_usage >> shiftp->shift2); thread->sched_usage = (thread->sched_usage >> shiftp->shift1) + (thread->sched_usage >> shiftp->shift2); } else { thread->cpu_usage = (thread->cpu_usage >> shiftp->shift1) - (thread->cpu_usage >> -(shiftp->shift2)); thread->sched_usage = (thread->sched_usage >> shiftp->shift1) - (thread->sched_usage >> -(shiftp->shift2)); } } thread->cpu_delta = 0; thread->sched_delta = 0; /* * Recompute priority if appropriate. */ if ( #if MACH_FIXPRI (thread->policy == POLICY_TIMESHARE) && #endif /* MACH_FIXPRI */ (thread->depress_priority < 0)) { do_priority_computation(thread, temp_pri); thread->sched_pri = temp_pri; } } /* * run_queue_enqueue macro for thread_setrun(). */ #if DEBUG #define run_queue_enqueue(rq, th) \ MACRO_BEGIN \ unsigned int whichq; \ \ whichq = (th)->sched_pri; \ if (whichq >= NRQS) { \ printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri); \ whichq = NRQS - 1; \ } \ \ simple_lock(&(rq)->lock); /* lock the run queue */ \ checkrq((rq), "thread_setrun: before adding thread"); \ enqueue_tail(&(rq)->runq[whichq], &((th)->links)); \ \ if (whichq < (rq)->low || (rq)->count == 0) \ (rq)->low = whichq; /* minimize */ \ \ (rq)->count++; \ (th)->runq = (rq); \ thread_check((th), (rq)); \ checkrq((rq), "thread_setrun: after adding thread"); \ simple_unlock(&(rq)->lock); \ MACRO_END #else /* DEBUG */ #define run_queue_enqueue(rq, th) \ MACRO_BEGIN \ unsigned int whichq; \ \ whichq = (th)->sched_pri; \ if (whichq >= NRQS) { \ printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri); \ whichq = NRQS - 1; \ } \ \ simple_lock(&(rq)->lock); /* lock the run queue */ \ enqueue_tail(&(rq)->runq[whichq], &((th)->links)); \ \ if (whichq < (rq)->low || (rq)->count == 0) \ (rq)->low = whichq; /* minimize */ \ \ (rq)->count++; \ (th)->runq = (rq); \ simple_unlock(&(rq)->lock); \ MACRO_END #endif /* DEBUG */ /* * thread_setrun: * * Make thread runnable; dispatch directly onto an idle processor * if possible. Else put on appropriate run queue (processor * if bound, else processor set. Caller must have lock on thread. * This is always called at splsched. */ void thread_setrun( thread_t th, boolean_t may_preempt) { processor_t processor; run_queue_t rq; #if NCPUS > 1 processor_set_t pset; #endif /* NCPUS > 1 */ /* * Update priority if needed. */ if (th->sched_stamp != sched_tick) { update_priority(th); } assert(th->runq == RUN_QUEUE_NULL); #if NCPUS > 1 /* * Try to dispatch the thread directly onto an idle processor. */ if ((processor = th->bound_processor) == PROCESSOR_NULL) { /* * Not bound, any processor in the processor set is ok. */ pset = th->processor_set; #if HW_FOOTPRINT /* * But first check the last processor it ran on. */ processor = th->last_processor; if (processor->state == PROCESSOR_IDLE) { simple_lock(&processor->lock); simple_lock(&pset->idle_lock); if ((processor->state == PROCESSOR_IDLE) #if MACH_HOST && (processor->processor_set == pset) #endif /* MACH_HOST */ ) { queue_remove(&pset->idle_queue, processor, processor_t, processor_queue); pset->idle_count--; processor->next_thread = th; processor->state = PROCESSOR_DISPATCHING; simple_unlock(&pset->idle_lock); simple_unlock(&processor->lock); return; } simple_unlock(&pset->idle_lock); simple_unlock(&processor->lock); } #endif /* HW_FOOTPRINT */ if (pset->idle_count > 0) { simple_lock(&pset->idle_lock); if (pset->idle_count > 0) { processor = (processor_t) queue_first(&pset->idle_queue); queue_remove(&(pset->idle_queue), processor, processor_t, processor_queue); pset->idle_count--; processor->next_thread = th; processor->state = PROCESSOR_DISPATCHING; simple_unlock(&pset->idle_lock); return; } simple_unlock(&pset->idle_lock); } rq = &(pset->runq); run_queue_enqueue(rq,th); /* * Preempt check */ if (may_preempt && #if MACH_HOST (pset == current_processor()->processor_set) && #endif /* MACH_HOST */ (current_thread()->sched_pri > th->sched_pri)) { /* * Turn off first_quantum to allow csw. */ current_processor()->first_quantum = FALSE; ast_on(cpu_number(), AST_BLOCK); } } else { /* * Bound, can only run on bound processor. Have to lock * processor here because it may not be the current one. */ if (processor->state == PROCESSOR_IDLE) { simple_lock(&processor->lock); pset = processor->processor_set; simple_lock(&pset->idle_lock); if (processor->state == PROCESSOR_IDLE) { queue_remove(&pset->idle_queue, processor, processor_t, processor_queue); pset->idle_count--; processor->next_thread = th; processor->state = PROCESSOR_DISPATCHING; simple_unlock(&pset->idle_lock); simple_unlock(&processor->lock); return; } simple_unlock(&pset->idle_lock); simple_unlock(&processor->lock); } rq = &(processor->runq); run_queue_enqueue(rq,th); /* * Cause ast on processor if processor is on line. * * XXX Don't do this remotely to master because this will * XXX send an interprocessor interrupt, and that's too * XXX expensive for all the unparallelized U*x code. */ if (processor == current_processor()) { ast_on(cpu_number(), AST_BLOCK); } else if ((processor != master_processor) && (processor->state != PROCESSOR_OFF_LINE)) { cause_ast_check(processor); } } #else /* NCPUS > 1 */ /* * XXX should replace queue with a boolean in this case. */ if (default_pset.idle_count > 0) { processor = (processor_t) queue_first(&default_pset.idle_queue); queue_remove(&default_pset.idle_queue, processor, processor_t, processor_queue); default_pset.idle_count--; processor->next_thread = th; processor->state = PROCESSOR_DISPATCHING; return; } if (th->bound_processor == PROCESSOR_NULL) { rq = &(default_pset.runq); } else { rq = &(master_processor->runq); ast_on(cpu_number(), AST_BLOCK); } run_queue_enqueue(rq,th); /* * Preempt check */ if (may_preempt && (current_thread()->sched_pri > th->sched_pri)) { /* * Turn off first_quantum to allow context switch. */ current_processor()->first_quantum = FALSE; ast_on(cpu_number(), AST_BLOCK); } #endif /* NCPUS > 1 */ } /* * set_pri: * * Set the priority of the specified thread to the specified * priority. This may cause the thread to change queues. * * The thread *must* be locked by the caller. */ void set_pri( thread_t th, int pri, boolean_t resched) { struct run_queue *rq; rq = rem_runq(th); th->sched_pri = pri; if (rq != RUN_QUEUE_NULL) { if (resched) thread_setrun(th, TRUE); else run_queue_enqueue(rq, th); } } /* * rem_runq: * * Remove a thread from its run queue. * The run queue that the process was on is returned * (or RUN_QUEUE_NULL if not on a run queue). Thread *must* be locked * before calling this routine. Unusual locking protocol on runq * field in thread structure makes this code interesting; see thread.h. */ struct run_queue *rem_runq( thread_t th) { struct run_queue *rq; rq = th->runq; /* * If rq is RUN_QUEUE_NULL, the thread will stay out of the * run_queues because the caller locked the thread. Otherwise * the thread is on a runq, but could leave. */ if (rq != RUN_QUEUE_NULL) { simple_lock(&rq->lock); #if DEBUG checkrq(rq, "rem_runq: at entry"); #endif /* DEBUG */ if (rq == th->runq) { /* * Thread is in a runq and we have a lock on * that runq. */ #if DEBUG checkrq(rq, "rem_runq: before removing thread"); thread_check(th, rq); #endif /* DEBUG */ remqueue(&rq->runq[0], (queue_entry_t) th); rq->count--; #if DEBUG checkrq(rq, "rem_runq: after removing thread"); #endif /* DEBUG */ th->runq = RUN_QUEUE_NULL; simple_unlock(&rq->lock); } else { /* * The thread left the runq before we could * lock the runq. It is not on a runq now, and * can't move again because this routine's * caller locked the thread. */ simple_unlock(&rq->lock); rq = RUN_QUEUE_NULL; } } return rq; } /* * choose_thread: * * Choose a thread to execute. The thread chosen is removed * from its run queue. Note that this requires only that the runq * lock be held. * * Strategy: * Check processor runq first; if anything found, run it. * Else check pset runq; if nothing found, return idle thread. * * Second line of strategy is implemented by choose_pset_thread. * This is only called on processor startup and when thread_block * thinks there's something in the processor runq. */ thread_t choose_thread( processor_t myprocessor) { thread_t th; queue_t q; run_queue_t runq; int i; processor_set_t pset; runq = &myprocessor->runq; simple_lock(&runq->lock); if (runq->count > 0) { q = runq->runq + runq->low; for (i = runq->low; i < NRQS ; i++, q++) { if (!queue_empty(q)) { th = (thread_t) dequeue_head(q); th->runq = RUN_QUEUE_NULL; runq->count--; runq->low = i; simple_unlock(&runq->lock); return th; } } panic("choose_thread"); /*NOTREACHED*/ } simple_unlock(&runq->lock); pset = myprocessor->processor_set; simple_lock(&pset->runq.lock); return choose_pset_thread(myprocessor,pset); } /* * choose_pset_thread: choose a thread from processor_set runq or * set processor idle and choose its idle thread. * * Caller must be at splsched and have a lock on the runq. This * lock is released by this routine. myprocessor is always the current * processor, and pset must be its processor set. * This routine chooses and removes a thread from the runq if there * is one (and returns it), else it sets the processor idle and * returns its idle thread. */ thread_t choose_pset_thread( processor_t myprocessor, processor_set_t pset) { run_queue_t runq; thread_t th; queue_t q; int i; runq = &pset->runq; if (runq->count > 0) { q = runq->runq + runq->low; for (i = runq->low; i < NRQS ; i++, q++) { if (!queue_empty(q)) { th = (thread_t) dequeue_head(q); th->runq = RUN_QUEUE_NULL; runq->count--; /* * For POLICY_FIXEDPRI, runq->low must be * accurate! */ #if MACH_FIXPRI if ((runq->count > 0) && (pset->policies & POLICY_FIXEDPRI)) { while (queue_empty(q)) { q++; i++; } } #endif /* MACH_FIXPRI */ runq->low = i; #if DEBUG checkrq(runq, "choose_pset_thread"); #endif /* DEBUG */ simple_unlock(&runq->lock); return th; } } panic("choose_pset_thread"); /*NOTREACHED*/ } simple_unlock(&runq->lock); /* * Nothing is runnable, so set this processor idle if it * was running. If it was in an assignment or shutdown, * leave it alone. Return its idle thread. */ simple_lock(&pset->idle_lock); if (myprocessor->state == PROCESSOR_RUNNING) { myprocessor->state = PROCESSOR_IDLE; /* * XXX Until it goes away, put master on end of queue, others * XXX on front so master gets used last. */ if (myprocessor == master_processor) { queue_enter(&(pset->idle_queue), myprocessor, processor_t, processor_queue); } else { queue_enter_first(&(pset->idle_queue), myprocessor, processor_t, processor_queue); } pset->idle_count++; } simple_unlock(&pset->idle_lock); return myprocessor->idle_thread; } /* * no_dispatch_count counts number of times processors go non-idle * without being dispatched. This should be very rare. */ int no_dispatch_count = 0; /* * This is the idle thread, which just looks for other threads * to execute. */ void idle_thread_continue(void) { processor_t myprocessor; volatile thread_t *threadp; volatile int *gcount; volatile int *lcount; thread_t new_thread; int state; int mycpu; spl_t s; mycpu = cpu_number(); myprocessor = current_processor(); threadp = (volatile thread_t *) &myprocessor->next_thread; lcount = (volatile int *) &myprocessor->runq.count; while (TRUE) { #ifdef MARK_CPU_IDLE MARK_CPU_IDLE(mycpu); #endif /* MARK_CPU_IDLE */ #if MACH_HOST gcount = (volatile int *) &myprocessor->processor_set->runq.count; #else /* MACH_HOST */ gcount = (volatile int *) &default_pset.runq.count; #endif /* MACH_HOST */ /* * This cpu will be dispatched (by thread_setrun) by setting next_thread * to the value of the thread to run next. Also check runq counts. */ while ((*threadp == (volatile thread_t)THREAD_NULL) && (*gcount == 0) && (*lcount == 0)) { /* check for ASTs while we wait */ if (need_ast[mycpu] &~ AST_SCHEDULING) { (void) splsched(); /* don't allow scheduling ASTs */ need_ast[mycpu] &= ~AST_SCHEDULING; ast_taken(); /* back at spl0 */ } /* * machine_idle is a machine dependent function, * to conserve power. */ #if POWER_SAVE machine_idle(mycpu); #endif /* POWER_SAVE */ } #ifdef MARK_CPU_ACTIVE MARK_CPU_ACTIVE(mycpu); #endif /* MARK_CPU_ACTIVE */ s = splsched(); /* * This is not a switch statement to avoid the * bounds checking code in the common case. */ retry: state = myprocessor->state; if (state == PROCESSOR_DISPATCHING) { /* * Commmon case -- cpu dispatched. */ new_thread = (thread_t) *threadp; *threadp = (volatile thread_t) THREAD_NULL; myprocessor->state = PROCESSOR_RUNNING; /* * set up quantum for new thread. */ #if MACH_FIXPRI if (new_thread->policy == POLICY_TIMESHARE) { #endif /* MACH_FIXPRI */ /* * Just use set quantum. No point in * checking for shorter local runq quantum; * csw_needed will handle correctly. */ #if MACH_HOST myprocessor->quantum = new_thread-> processor_set->set_quantum; #else /* MACH_HOST */ myprocessor->quantum = default_pset.set_quantum; #endif /* MACH_HOST */ #if MACH_FIXPRI } else { /* * POLICY_FIXEDPRI */ myprocessor->quantum = new_thread->sched_data; } #endif /* MACH_FIXPRI */ myprocessor->first_quantum = TRUE; counter(c_idle_thread_handoff++); thread_run(idle_thread_continue, new_thread); } else if (state == PROCESSOR_IDLE) { processor_set_t pset; pset = myprocessor->processor_set; simple_lock(&pset->idle_lock); if (myprocessor->state != PROCESSOR_IDLE) { /* * Something happened, try again. */ simple_unlock(&pset->idle_lock); goto retry; } /* * Processor was not dispatched (Rare). * Set it running again. */ no_dispatch_count++; pset->idle_count--; queue_remove(&pset->idle_queue, myprocessor, processor_t, processor_queue); myprocessor->state = PROCESSOR_RUNNING; simple_unlock(&pset->idle_lock); counter(c_idle_thread_block++); thread_block(idle_thread_continue); } else if ((state == PROCESSOR_ASSIGN) || (state == PROCESSOR_SHUTDOWN)) { /* * Changing processor sets, or going off-line. * Release next_thread if there is one. Actual * thread to run is on a runq. */ if ((new_thread = (thread_t)*threadp)!= THREAD_NULL) { *threadp = (volatile thread_t) THREAD_NULL; thread_setrun(new_thread, FALSE); } counter(c_idle_thread_block++); thread_block(idle_thread_continue); } else { printf(" Bad processor state %d (Cpu %d)\n", cpu_state(mycpu), mycpu); panic("idle_thread"); } (void) splx(s); } } void idle_thread(void) { thread_t self = current_thread(); spl_t s; stack_privilege(self); s = splsched(); self->priority = NRQS-1; self->sched_pri = NRQS-1; /* * Set the idle flag to indicate that this is an idle thread, * enter ourselves in the idle array, and thread_block() to get * out of the run queues (and set the processor idle when we * run next time). */ thread_lock(self); self->state |= TH_IDLE; thread_unlock(self); current_processor()->idle_thread = self; (void) splx(s); counter(c_idle_thread_block++); thread_block(idle_thread_continue); idle_thread_continue(); /*NOTREACHED*/ } /* * sched_thread: scheduler thread. * * This thread handles periodic calculations in the scheduler that * we don't want to do at interrupt level. This allows us to * avoid blocking. */ void sched_thread_continue(void) { while (TRUE) { (void) compute_mach_factor(); /* * Check for stuck threads. This can't be done off of * the callout queue because it requires operations that * can't be used from interrupt level. */ if (sched_tick & 1) do_thread_scan(); assert_wait((event_t) 0, FALSE); counter(c_sched_thread_block++); thread_block(sched_thread_continue); } } void sched_thread(void) { sched_thread_id = current_thread(); /* * Sleep on event 0, recompute_priorities() will awaken * us by calling clear_wait(). */ assert_wait((event_t) 0, FALSE); counter(c_sched_thread_block++); thread_block(sched_thread_continue); sched_thread_continue(); /*NOTREACHED*/ } #define MAX_STUCK_THREADS 16 /* * do_thread_scan: scan for stuck threads. A thread is stuck if * it is runnable but its priority is so low that it has not * run for several seconds. Its priority should be higher, but * won't be until it runs and calls update_priority. The scanner * finds these threads and does the updates. * * Scanner runs in two passes. Pass one squirrels likely * thread ids away in an array, and removes them from the run queue. * Pass two does the priority updates. This is necessary because * the run queue lock is required for the candidate scan, but * cannot be held during updates [set_pri will deadlock]. * * Array length should be enough so that restart isn't necessary, * but restart logic is included. Does not scan processor runqs. * */ boolean_t do_thread_scan_debug = FALSE; thread_t stuck_threads[MAX_STUCK_THREADS]; int stuck_count = 0; /* * do_runq_scan is the guts of pass 1. It scans a runq for * stuck threads. A boolean is returned indicating whether * it ran out of space. */ boolean_t do_runq_scan( run_queue_t runq) { spl_t s; queue_t q; thread_t thread; int count; s = splsched(); simple_lock(&runq->lock); if((count = runq->count) > 0) { q = runq->runq + runq->low; while (count > 0) { thread = (thread_t) queue_first(q); while (!queue_end(q, (queue_entry_t) thread)) { /* * Get the next thread now, since we may * remove this thread from the run queue. */ thread_t next = (thread_t) queue_next(&thread->links); if ((thread->state & TH_SCHED_STATE) == TH_RUN && sched_tick - thread->sched_stamp > 1) { /* * Stuck, save its id for later. */ if (stuck_count == MAX_STUCK_THREADS) { /* * !@#$% No more room. */ simple_unlock(&runq->lock); splx(s); return TRUE; } /* * We can`t take the thread_lock here, * since we already have the runq lock. * So we can`t grab a reference to the * thread. However, a thread that is * in RUN state cannot be deallocated * until it stops running. If it isn`t * on the runq, then thread_halt cannot * see it. So we remove the thread * from the runq to make it safe. */ remqueue(q, (queue_entry_t) thread); runq->count--; thread->runq = RUN_QUEUE_NULL; stuck_threads[stuck_count++] = thread; if (do_thread_scan_debug) printf("do_runq_scan: adding thread %p\n", thread); } count--; thread = next; } q++; } } simple_unlock(&runq->lock); splx(s); return FALSE; } void do_thread_scan(void) { spl_t s; boolean_t restart_needed = 0; thread_t thread; #if MACH_HOST processor_set_t pset; #endif /* MACH_HOST */ do { #if MACH_HOST simple_lock(&all_psets_lock); queue_iterate(&all_psets, pset, processor_set_t, all_psets) { if (restart_needed = do_runq_scan(&pset->runq)) break; } simple_unlock(&all_psets_lock); #else /* MACH_HOST */ restart_needed = do_runq_scan(&default_pset.runq); #endif /* MACH_HOST */ if (!restart_needed) restart_needed = do_runq_scan(&master_processor->runq); /* * Ok, we now have a collection of candidates -- fix them. */ while (stuck_count > 0) { thread = stuck_threads[--stuck_count]; stuck_threads[stuck_count] = THREAD_NULL; s = splsched(); thread_lock(thread); if ((thread->state & TH_SCHED_STATE) == TH_RUN) { /* * Do the priority update. Call * thread_setrun because thread is * off the run queues. */ update_priority(thread); thread_setrun(thread, TRUE); } thread_unlock(thread); splx(s); } } while (restart_needed); } #if DEBUG void checkrq( run_queue_t rq, char *msg) { queue_t q1; int i, j; queue_entry_t e; int low; low = -1; j = 0; q1 = rq->runq; for (i = 0; i < NRQS; i++) { if (q1->next == q1) { if (q1->prev != q1) panic("checkrq: empty at %s", msg); } else { if (low == -1) low = i; for (e = q1->next; e != q1; e = e->next) { j++; if (e->next->prev != e) panic("checkrq-2 at %s", msg); if (e->prev->next != e) panic("checkrq-3 at %s", msg); } } q1++; } if (j != rq->count) panic("checkrq: count wrong at %s", msg); if (rq->count != 0 && low < rq->low) panic("checkrq: low wrong at %s", msg); } void thread_check( thread_t th, run_queue_t rq) { unsigned int whichq; whichq = th->sched_pri; if (whichq >= NRQS) { printf("thread_check: priority too high\n"); whichq = NRQS-1; } if ((th->links.next == &rq->runq[whichq]) && (rq->runq[whichq].prev != (queue_entry_t)th)) panic("thread_check"); } #endif /* DEBUG */