diff options
author | Thomas Bushnell <thomas@gnu.org> | 1997-02-25 21:28:37 +0000 |
---|---|---|
committer | Thomas Bushnell <thomas@gnu.org> | 1997-02-25 21:28:37 +0000 |
commit | f07a4c844da9f0ecae5bbee1ab94be56505f26f7 (patch) | |
tree | 12b07c7e578fc1a5f53dbfde2632408491ff2a70 /kern |
Initial source
Diffstat (limited to 'kern')
81 files changed, 27011 insertions, 0 deletions
diff --git a/kern/act.c b/kern/act.c new file mode 100644 index 0000000..697804f --- /dev/null +++ b/kern/act.c @@ -0,0 +1,1134 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: act.c + * + * Activation management routines + * + */ + +#ifdef MIGRATING_THREADS + +#include <mach_ipc_compat.h> /* XXX */ +#include <mach/kern_return.h> +#include <mach/alert.h> +#include <kern/mach_param.h> /* XXX INCALL_... */ +#include <kern/zalloc.h> +#include <kern/thread.h> +#include <kern/task.h> +#include <kern/act.h> +#include <kern/current.h> +#include "ipc_target.h" + +static void special_handler(ReturnHandler *rh, struct Act *act); + +#ifdef ACT_STATIC_KLUDGE +#undef ACT_STATIC_KLUDGE +#define ACT_STATIC_KLUDGE 300 +#endif + +#ifndef ACT_STATIC_KLUDGE +static zone_t act_zone; +#else +static Act *act_freelist; +static Act free_acts[ACT_STATIC_KLUDGE]; +#endif + +/* This is a rather special activation + which resides at the top and bottom of every thread. + When the last "real" activation on a thread is destroyed, + the null_act on the bottom gets invoked, destroying the thread. + At the top, the null_act acts as an "invalid" cached activation, + which will always fail the cached-activation test on RPC paths. + + As you might expect, most of its members have no particular value. + alerts is zero. */ +Act null_act; + +void +global_act_init() +{ +#ifndef ACT_STATIC_KLUDGE + act_zone = zinit( + sizeof(struct Act), + ACT_MAX * sizeof(struct Act), /* XXX */ + ACT_CHUNK * sizeof(struct Act), + 0, "activations"); +#else + int i; + +printf("activations: [%x-%x]\n", &free_acts[0], &free_acts[ACT_STATIC_KLUDGE]); + act_freelist = &free_acts[0]; + free_acts[0].ipt_next = 0; + for (i = 1; i < ACT_STATIC_KLUDGE; i++) { + free_acts[i].ipt_next = act_freelist; + act_freelist = &free_acts[i]; + } + /* XXX simple_lock_init(&act_freelist->lock); */ +#endif + +#if 0 + simple_lock_init(&null_act.lock); + refcount_init(&null_act.ref_count, 1); +#endif + + act_machine_init(); +} + +/* Create a new activation in a specific task. + Locking: Task */ +kern_return_t act_create(task_t task, vm_offset_t user_stack, + vm_offset_t user_rbuf, vm_size_t user_rbuf_size, + struct Act **new_act) +{ + Act *act; + int rc; + +#ifndef ACT_STATIC_KLUDGE + act = (Act*)zalloc(act_zone); + if (act == 0) + return(KERN_RESOURCE_SHORTAGE); +#else + /* XXX ipt_lock(act_freelist); */ + act = act_freelist; + if (act == 0) panic("out of activations"); + act_freelist = act->ipt_next; + /* XXX ipt_unlock(act_freelist); */ + act->ipt_next = 0; +#endif + bzero(act, sizeof(*act)); /*XXX shouldn't be needed */ + +#ifdef DEBUG + act->lower = act->higher = 0; +#endif + + /* Start with one reference for being active, another for the caller */ + simple_lock_init(&act->lock); + refcount_init(&act->ref_count, 2); + + /* Latch onto the task. */ + act->task = task; + task_reference(task); + + /* Other simple setup */ + act->ipt = 0; + act->thread = 0; + act->suspend_count = 0; + act->active = 1; + act->handlers = 0; + + /* The special_handler will always be last on the returnhandlers list. */ + act->special_handler.next = 0; + act->special_handler.handler = special_handler; + + ipc_act_init(task, act); + act_machine_create(task, act, user_stack, user_rbuf, user_rbuf_size); + + task_lock(task); + + /* Chain the act onto the task's list */ + act->task_links.next = task->acts.next; + act->task_links.prev = &task->acts; + task->acts.next->prev = &act->task_links; + task->acts.next = &act->task_links; + task->act_count++; + + task_unlock(task); + + *new_act = act; + return KERN_SUCCESS; +} + +/* This is called when an act's ref_count drops to zero. + This can only happen when thread is zero (not in use), + ipt is zero (not attached to any ipt), + and active is false (terminated). */ +static void act_free(Act *inc) +{ + act_machine_destroy(inc); + ipc_act_destroy(inc); + + /* Drop the task reference. */ + task_deallocate(inc->task); + + /* Put the act back on the act zone */ +#ifndef ACT_STATIC_KLUDGE + zfree(act_zone, (vm_offset_t)inc); +#else + /* XXX ipt_lock(act_freelist); */ + inc->ipt_next = act_freelist; + act_freelist = inc; + /* XXX ipt_unlock(act_freelist); */ +#endif +} + +void act_deallocate(Act *inc) +{ + refcount_drop(&inc->ref_count, act_free(inc)); +} + +/* Attach an act to the top of a thread ("push the stack"). + The thread must be either the current one or a brand-new one. + Assumes the act is active but not in use. + Assumes that if it is attached to an ipt (i.e. the ipt pointer is nonzero), + the act has already been taken off the ipt's list. + + Already locked: cur_thread, act */ +void act_attach(Act *act, thread_t thread, unsigned init_alert_mask) +{ + Act *lower; + + act->thread = thread; + + /* The thread holds a reference to the activation while using it. */ + refcount_take(&act->ref_count); + + /* XXX detach any cached activations from above the target */ + + /* Chain the act onto the thread's act stack. */ + lower = thread->top_act; + act->lower = lower; + lower->higher = act; + thread->top_act = act; + + act->alert_mask = init_alert_mask; + act->alerts = lower->alerts & init_alert_mask; +} + +/* Remove the current act from the top of the current thread ("pop the stack"). + Return it to the ipt it lives on, if any. + Locking: Thread > Act(not on ipt) > ipc_target */ +void act_detach(Act *cur_act) +{ + thread_t cur_thread = cur_act->thread; + + thread_lock(cur_thread); + act_lock(cur_act); + + /* Unlink the act from the thread's act stack */ + cur_thread->top_act = cur_act->lower; + cur_act->thread = 0; +#ifdef DEBUG + cur_act->lower = cur_act->higher = 0; +#endif + + thread_unlock(cur_thread); + + /* Return it to the ipt's list */ + if (cur_act->ipt) + { + ipt_lock(cur_act->ipt); + cur_act->ipt_next = cur_act->ipt->ipt_acts; + cur_act->ipt->ipt_acts = cur_act; + ipt_unlock(cur_act->ipt); +#if 0 + printf(" return to ipt %x\n", cur_act->ipt); +#endif + } + + act_unlock(cur_act); + + /* Drop the act reference taken for being in use. */ + refcount_drop(&cur_act->ref_count, act_free(cur_act)); +} + + + +/*** Activation control support routines ***/ + +/* This is called by system-dependent code + when it detects that act->handlers is non-null + while returning into user mode. + Activations linked onto an ipt always have null act->handlers, + so RPC entry paths need not check it. + + Locking: Act */ +void act_execute_returnhandlers() +{ + Act *act = current_act(); + +#if 0 + printf("execute_returnhandlers\n"); +#endif + while (1) { + ReturnHandler *rh; + + /* Grab the next returnhandler */ + act_lock(act); + rh = act->handlers; + if (!rh) { + act_unlock(act); + return; + } + act->handlers = rh->next; + act_unlock(act); + + /* Execute it */ + (*rh->handler)(rh, act); + } +} + +/* Try to nudge an act into executing its returnhandler chain. + Ensures that the activation will execute its returnhandlers + before it next executes any of its user-level code. + Also ensures that it is safe to break the thread's activation chain + immediately above this activation, + by rolling out of any outstanding two-way-optimized RPC. + + The target activation is not necessarily active + or even in use by a thread. + If it isn't, this routine does nothing. + + Already locked: Act */ +static void act_nudge(struct Act *act) +{ + /* If it's suspended, wake it up. */ + thread_wakeup(&act->suspend_count); + + /* Do a machine-dependent low-level nudge. + If we're on a multiprocessor, + this may mean sending an interprocessor interrupt. + In any case, it means rolling out of two-way-optimized RPC paths. */ + act_machine_nudge(act); +} + +/* Install the special returnhandler that handles suspension and termination, + if it hasn't been installed already. + + Already locked: Act */ +static void install_special_handler(struct Act *act) +{ + ReturnHandler **rh; + + /* The work handler must always be the last ReturnHandler on the list, + because it can do tricky things like detach the act. */ + for (rh = &act->handlers; *rh; rh = &(*rh)->next); + if (rh != &act->special_handler.next) { + *rh = &act->special_handler; + } + + /* Nudge the target activation, + to ensure that it will see the returnhandler we're adding. */ + act_nudge(act); +} + +/* Locking: Act */ +static void special_handler(ReturnHandler *rh, struct Act *cur_act) +{ + retry: + + act_lock(cur_act); + + /* If someone has killed this invocation, + invoke the return path with a terminated exception. */ + if (!cur_act->active) { + act_unlock(cur_act); + act_machine_return(KERN_TERMINATED); + /* XXX should just set the activation's reentry_routine + and then return from special_handler(). + The magic reentry_routine should just pop its own activation + and chain to the reentry_routine of the _lower_ activation. + If that lower activation is the null_act, + the thread will then be terminated. */ + } + + /* If we're suspended, go to sleep and wait for someone to wake us up. */ + if (cur_act->suspend_count) { + act_unlock(cur_act); + /* XXX mp unsafe */ + thread_wait((int)&cur_act->suspend_count, FALSE); + + act_lock(cur_act); + + /* If we're still (or again) suspended, + go to sleep again after executing any new returnhandlers that may have appeared. */ + if (cur_act->suspend_count) + install_special_handler(cur_act); + } + + act_unlock(cur_act); +} + +#if 0 /************************ OLD SEMI-OBSOLETE CODE *********************/ +static __dead void act_throughcall_return(Act *act) +{ + /* Done - destroy the act and return */ + act_detach(act); + act_terminate(act); + act_deallocate(act); + + /* XXX */ + thread_terminate_self(); +} + +__dead void act_throughcall(task_t task, void (*infunc)()) +{ + thread_t thread = current_thread(); + Act *act; + ReturnHandler rh; + int rc; + + rc = act_create(task, 0, 0, 0, &act); + if (rc) return rc; + + act->return_routine = act_throughcall_return; + + thread_lock(thread); + act_lock(act); + + act_attach(thread, act, 0); + + rh.handler = infunc; + rh.next = act->handlers; + act->handlers = &rh; + + act_unlock(act); + thread_unlock(thread); + + /* Call through the act into the returnhandler list */ + act_machine_throughcall(act); +} + + +/* Grab an act from the specified pool, to pass to act_upcall. + Returns with the act locked, since it's in an inconsistent state + (not on its ipt but not on a thread either). + Returns null if no acts are available on the ipt. + + Locking: ipc_target > Act(on ipt) */ +Act *act_grab(struct ipc_target *ipt) +{ + Act *act; + + ipt_lock(ipt); + + retry: + + /* Pull an act off the ipt's list. */ + act = ipt->acts; + if (!act) + goto none_avail; + ipt->acts = act->ipt_next; + + act_lock(act); + + /* If it's been terminated, drop it and get another one. */ + if (!act->active) { +#if 0 + printf("dropping terminated act %08x\n", act); +#endif + /* XXX ipt_deallocate(ipt); */ + act->ipt = 0; + act_unlock(act); + act_deallocate(act); + goto retry; + } + +none_avail: + ipt_unlock(ipt); + + return act; +} + +/* Try to make an upcall with an act on the specified ipt. + If the ipt is empty, returns KERN_RESOURCE_SHORTAGE. XXX??? + + Locking: ipc_target > Act > Thread */ +kern_return_t act_upcall(struct Act *act, unsigned init_alert_mask, + vm_offset_t user_entrypoint, vm_offset_t user_data) +{ + thread_t cur_thread = current_thread(); + int rc; + + /* XXX locking */ + + act_attach(cur_thread, act, init_alert_mask); + + /* Make the upcall into the destination task */ + rc = act_machine_upcall(act, user_entrypoint, user_data); + + /* Done - detach the act and return */ + act_detach(act); + + return rc; +} +#endif /************************ END OF OLD SEMI-OBSOLETE CODE *********************/ + + + + +/*** Act service routines ***/ + +/* Lock this act and its current thread. + We can only find the thread from the act + and the thread must be locked before the act, + requiring a little icky juggling. + + If the thread is not currently on any thread, + returns with only the act locked. + + Note that this routine is not called on any performance-critical path. + It is only for explicit act operations + which don't happen often. + + Locking: Thread > Act */ +static thread_t act_lock_thread(Act *act) +{ + thread_t thread; + + retry: + + /* Find the thread */ + act_lock(act); + thread = act->thread; + if (thread == 0) + { + act_unlock(act); + return 0; + } + thread_reference(thread); + act_unlock(act); + + /* Lock the thread and re-lock the act, + and make sure the thread didn't change. */ + thread_lock(thread); + act_lock(act); + if (act->thread != thread) + { + act_unlock(act); + thread_unlock(thread); + thread_deallocate(thread); + goto retry; + } + + thread_deallocate(thread); + + return thread; +} + +/* Already locked: act->task + Locking: Task > Act */ +kern_return_t act_terminate_task_locked(struct Act *act) +{ + act_lock(act); + + if (act->active) + { + /* Unlink the act from the task's act list, + so it doesn't appear in calls to task_acts and such. + The act still keeps its ref on the task, however, + until it loses all its own references and is freed. */ + act->task_links.next->prev = act->task_links.prev; + act->task_links.prev->next = act->task_links.next; + act->task->act_count--; + + /* Remove it from any ipc_target. XXX is this right? */ + act_set_target(act, 0); + + /* This will allow no more control operations on this act. */ + act->active = 0; + + /* When the special_handler gets executed, + it will see the terminated condition and exit immediately. */ + install_special_handler(act); + + /* Drop the act reference taken for being active. + (There is still at least one reference left: the one we were passed.) */ + act_deallocate(act); + } + + act_unlock(act); + + return KERN_SUCCESS; +} + +/* Locking: Task > Act */ +kern_return_t act_terminate(struct Act *act) +{ + task_t task = act->task; + kern_return_t rc; + + /* act->task never changes, + so we can read it before locking the act. */ + task_lock(act->task); + + rc = act_terminate_task_locked(act); + + task_unlock(act->task); + + return rc; +} + +/* If this Act is on a Thread and is not the topmost, + yank it and everything below it off of the thread's stack + and put it all on a new thread forked from the original one. + May fail due to resource shortage, but can always be retried. + + Locking: Thread > Act */ +kern_return_t act_yank(Act *act) +{ + thread_t thread = act_lock_thread(act); + +#if 0 + printf("act_yank inc %08x thread %08x\n", act, thread); +#endif + if (thread) + { + if (thread->top_act != act) + { + printf("detaching act %08x from thread %08x\n", act, thread); + + /* Nudge the activation into a clean point for detachment. */ + act_nudge(act); + + /* Now detach the activation + and give the orphan its own flow of control. */ + /*XXX*/ + } + + thread_unlock(thread); + } + act_unlock(act); + + /* Ask the thread to return as quickly as possible, + because its results are now useless. */ + act_abort(act); + + return KERN_SUCCESS; +} + +/* Assign an activation to a specific ipc_target. + Fails if the activation is already assigned to another pool. + If ipt == 0, we remove the from its ipt. + + Locking: Act(not on ipt) > ipc_target > Act(on ipt) */ +kern_return_t act_set_target(Act *act, struct ipc_target *ipt) +{ + act_lock(act); + + if (ipt == 0) + { + Act **lact; + + ipt = act->ipt; + if (ipt == 0) + return; + + /* XXX This is a violation of the locking order. */ + ipt_lock(ipt); + for (lact = &ipt->ipt_acts; *lact; lact = &((*lact)->ipt_next)) + if (act == *lact) + { + *lact = act->ipt_next; + break; + } + ipt_unlock(ipt); + + act->ipt = 0; + /* XXX ipt_deallocate(ipt); */ + act_deallocate(act); + return; + } + if (act->ipt != ipt) + { + if (act->ipt != 0) + { + act_unlock(act); + return KERN_FAILURE; /*XXX*/ + } + act->ipt = ipt; + ipt->ipt_type |= IPT_TYPE_MIGRATE_RPC; + + /* They get references to each other. */ + act_reference(act); + ipt_reference(ipt); + + /* If it is available, + add it to the ipt's available-activation list. */ + if ((act->thread == 0) && (act->suspend_count == 0)) + { + ipt_lock(ipt); + act->ipt_next = ipt->ipt_acts; + act->ipt->ipt_acts = act; + ipt_unlock(ipt); + } + } + act_unlock(act); + + return KERN_SUCCESS; +} + +/* Register an alert from this activation. + Each set bit is propagated upward from (but not including) this activation, + until the top of the chain is reached or the bit is masked. + + Locking: Thread > Act */ +kern_return_t act_alert(struct Act *act, unsigned alerts) +{ + thread_t thread = act_lock_thread(act); + +#if 0 + printf("act_alert %08x: %08x\n", act, alerts); +#endif + if (thread) + { + struct Act *act_up = act; + while ((alerts) && (act_up != thread->top_act)) + { + act_up = act_up->higher; + alerts &= act_up->alert_mask; + act_up->alerts |= alerts; + } + + /* XXX If we reach the top, and it is blocked in glue code, do something. */ + + thread_unlock(thread); + } + act_unlock(act); + + return KERN_SUCCESS; +} + +/* Locking: Thread > Act */ +kern_return_t act_abort(struct Act *act) +{ + return act_alert(act, ALERT_ABORT_STRONG); +} + +/* Locking: Thread > Act */ +kern_return_t act_abort_safely(struct Act *act) +{ + return act_alert(act, ALERT_ABORT_SAFE); +} + +/* Locking: Thread > Act */ +kern_return_t act_alert_mask(struct Act *act, unsigned alert_mask) +{ + panic("act_alert_mask\n"); + return KERN_SUCCESS; +} + +/* Locking: Thread > Act */ +kern_return_t act_suspend(struct Act *act) +{ + thread_t thread = act_lock_thread(act); + kern_return_t rc = KERN_SUCCESS; + +#if 0 + printf("act_suspend %08x\n", act); +#endif + if (act->active) + { + if (act->suspend_count++ == 0) + { + /* XXX remove from ipt */ + install_special_handler(act); + act_nudge(act); + } + } + else + rc = KERN_TERMINATED; + + if (thread) + thread_unlock(thread); + act_unlock(act); + + return rc; +} + +/* Locking: Act */ +kern_return_t act_resume(struct Act *act) +{ +#if 0 + printf("act_resume %08x from %d\n", act, act->suspend_count); +#endif + + act_lock(act); + if (!act->active) + { + act_unlock(act); + return KERN_TERMINATED; + } + + if (act->suspend_count > 0) { + if (--act->suspend_count == 0) { + thread_wakeup(&act->suspend_count); + /* XXX return to ipt */ + } + } + + act_unlock(act); + + return KERN_SUCCESS; +} + +typedef struct GetSetState { + struct ReturnHandler rh; + int flavor; + void *state; + int *pcount; + int result; +} GetSetState; + +/* Locking: Thread */ +kern_return_t get_set_state(struct Act *act, int flavor, void *state, int *pcount, + void (*handler)(ReturnHandler *rh, struct Act *act)) +{ + GetSetState gss; + + /* Initialize a small parameter structure */ + gss.rh.handler = handler; + gss.flavor = flavor; + gss.state = state; + gss.pcount = pcount; + + /* Add it to the act's return handler list */ + act_lock(act); + gss.rh.next = act->handlers; + act->handlers = &gss.rh; + + act_nudge(act); + + act_unlock(act); + /* XXX mp unsafe */ + thread_wait((int)&gss, 0); /* XXX could be interruptible */ + + return gss.result; +} + +static void get_state_handler(ReturnHandler *rh, struct Act *act) +{ + GetSetState *gss = (GetSetState*)rh; + + gss->result = act_machine_get_state(act, gss->flavor, gss->state, gss->pcount); + thread_wakeup((int)gss); +} + +/* Locking: Thread */ +kern_return_t act_get_state(struct Act *act, int flavor, natural_t *state, natural_t *pcount) +{ + return get_set_state(act, flavor, state, pcount, get_state_handler); +} + +static void set_state_handler(ReturnHandler *rh, struct Act *act) +{ + GetSetState *gss = (GetSetState*)rh; + + gss->result = act_machine_set_state(act, gss->flavor, gss->state, *gss->pcount); + thread_wakeup((int)gss); +} + +/* Locking: Thread */ +kern_return_t act_set_state(struct Act *act, int flavor, natural_t *state, natural_t count) +{ + return get_set_state(act, flavor, state, &count, set_state_handler); +} + + + +/*** backward compatibility hacks ***/ + +#include <mach/thread_info.h> +#include <mach/thread_special_ports.h> +#include <ipc/ipc_port.h> + +kern_return_t act_thread_info(Act *act, int flavor, + thread_info_t thread_info_out, unsigned *thread_info_count) +{ + return thread_info(act->thread, flavor, thread_info_out, thread_info_count); +} + +kern_return_t +act_thread_assign(Act *act, processor_set_t new_pset) +{ + return thread_assign(act->thread, new_pset); +} + +kern_return_t +act_thread_assign_default(Act *act) +{ + return thread_assign_default(act->thread); +} + +kern_return_t +act_thread_get_assignment(Act *act, processor_set_t *pset) +{ + return thread_get_assignment(act->thread, pset); +} + +kern_return_t +act_thread_priority(Act *act, int priority, boolean_t set_max) +{ + return thread_priority(act->thread, priority, set_max); +} + +kern_return_t +act_thread_max_priority(Act *act, processor_set_t *pset, int max_priority) +{ + return thread_max_priority(act->thread, pset, max_priority); +} + +kern_return_t +act_thread_policy(Act *act, int policy, int data) +{ + return thread_policy(act->thread, policy, data); +} + +kern_return_t +act_thread_wire(struct host *host, Act *act, boolean_t wired) +{ + return thread_wire(host, act->thread, wired); +} + +kern_return_t +act_thread_depress_abort(Act *act) +{ + return thread_depress_abort(act->thread); +} + +/* + * Routine: act_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the thread's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +act_get_special_port(Act *act, int which, ipc_port_t *portp) +{ + ipc_port_t *whichp; + ipc_port_t port; + +#if 0 + printf("act_get_special_port\n"); +#endif + if (act == 0) + return KERN_INVALID_ARGUMENT; + + switch (which) { +#if MACH_IPC_COMPAT + case THREAD_REPLY_PORT: + whichp = &act->reply_port; + break; +#endif MACH_IPC_COMPAT + + case THREAD_KERNEL_PORT: + whichp = &act->self_port; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &act->exception_port; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + thread_lock(act->thread); + + if (act->self_port == IP_NULL) { + thread_unlock(act->thread); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(*whichp); + thread_unlock(act->thread); + + *portp = port; + return KERN_SUCCESS; +} + +/* + * Routine: act_set_special_port [kernel call] + * Purpose: + * Changes one of the thread's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +act_set_special_port(Act *act, int which, ipc_port_t port) +{ + ipc_port_t *whichp; + ipc_port_t old; + +#if 0 + printf("act_set_special_port\n"); +#endif + if (act == 0) + return KERN_INVALID_ARGUMENT; + + switch (which) { +#if MACH_IPC_COMPAT + case THREAD_REPLY_PORT: + whichp = &act->reply_port; + break; +#endif MACH_IPC_COMPAT + + case THREAD_KERNEL_PORT: + whichp = &act->self_port; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &act->exception_port; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + thread_lock(act->thread); + if (act->self_port == IP_NULL) { + thread_unlock(act->thread); + return KERN_FAILURE; + } + + old = *whichp; + *whichp = port; + thread_unlock(act->thread); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; +} + +/* + * XXX lame, non-blocking ways to get/set state. + * Return thread's machine-dependent state. + */ +kern_return_t +act_get_state_immediate(act, flavor, old_state, old_state_count) + register Act *act; + int flavor; + void *old_state; /* pointer to OUT array */ + unsigned int *old_state_count; /*IN/OUT*/ +{ + kern_return_t ret; + + act_lock(act); + /* not the top activation, return current state */ + if (act->thread && act->thread->top_act != act) { + ret = act_machine_get_state(act, flavor, + old_state, old_state_count); + act_unlock(act); + return ret; + } + act_unlock(act); + + /* not sure this makes sense */ + return act_get_state(act, flavor, old_state, old_state_count); +} + +/* + * Change thread's machine-dependent state. + */ +kern_return_t +act_set_state_immediate(act, flavor, new_state, new_state_count) + register Act *act; + int flavor; + void *new_state; + unsigned int new_state_count; +{ + kern_return_t ret; + + act_lock(act); + /* not the top activation, set it now */ + if (act->thread && act->thread->top_act != act) { + ret = act_machine_set_state(act, flavor, + new_state, new_state_count); + act_unlock(act); + return ret; + } + act_unlock(act); + + /* not sure this makes sense */ + return act_set_state(act, flavor, new_state, new_state_count); +} + +void act_count() +{ + int i; + Act *act; + static int amin = ACT_STATIC_KLUDGE; + + i = 0; + for (act = act_freelist; act; act = act->ipt_next) + i++; + if (i < amin) + amin = i; + printf("%d of %d activations in use, %d max\n", + ACT_STATIC_KLUDGE-i, ACT_STATIC_KLUDGE, ACT_STATIC_KLUDGE-amin); +} + +dump_act(act) + Act *act; +{ + act_count(); + kact_count(); + while (act) { + printf("%08.8x: thread=%x, task=%x, hi=%x, lo=%x, ref=%x\n", + act, act->thread, act->task, + act->higher, act->lower, act->ref_count); + printf("\talerts=%x, mask=%x, susp=%x, active=%x\n", + act->alerts, act->alert_mask, + act->suspend_count, act->active); + machine_dump_act(&act->mact); + if (act == act->lower) + break; + act = act->lower; + } +} + +#ifdef ACTWATCH +Act * +get_next_act(sp) + int sp; +{ + static int i; + Act *act; + + while (1) { + if (i == ACT_STATIC_KLUDGE) { + i = 0; + return 0; + } + act = &free_acts[i]; + i++; + if (act->mact.space == sp) + return act; + } +} +#endif + +#endif /* MIGRATING_THREADS */ diff --git a/kern/act.h b/kern/act.h new file mode 100644 index 0000000..236e6b3 --- /dev/null +++ b/kern/act.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: act.h + * + * This defines the Act structure, + * which is the kernel representation of a user-space activation. + * + */ + +#ifndef _KERN_ACT_H_ +#define _KERN_ACT_H_ + +#ifdef MIGRATING_THREADS + +#ifndef __dead /* XXX */ +#define __dead +#endif + +#include <mach_ipc_compat.h> +#include <mach/vm_param.h> +#include <mach/port.h> +#include <kern/lock.h> +#include <kern/refcount.h> +#include <kern/queue.h> + +#include "act.h"/*XXX*/ + +struct task; +struct thread; +struct Act; + + +struct ReturnHandler { + struct ReturnHandler *next; + void (*handler)(struct ReturnHandler *rh, struct Act *act); +}; +typedef struct ReturnHandler ReturnHandler; + + + +struct Act { + + /*** Task linkage ***/ + + /* Links for task's circular list of activations. + The activation is only on the task's activation list while active. + Must be first. */ + queue_chain_t task_links; + + /* Reference to the task this activation is in. + This is constant as long as the activation is allocated. */ + struct task *task; + + + + /*** Machine-dependent state ***/ + /* XXX should be first to allow maximum flexibility to MD code */ + MachineAct mact; + + + + /*** Consistency ***/ + RefCount ref_count; + decl_simple_lock_data(,lock) + + + + /*** ipc_target-related stuff ***/ + + /* ActPool this activation normally lives on, zero if none. + The activation and actpool hold references to each other as long as this is nonzero + (even when the activation isn't actually on the actpool's list). */ + struct ipc_target *ipt; + + /* Link on the ipt's list of activations. + The activation is only actually on the ipt's list (and hence this is valid) + when we're not in use (thread == 0) and not suspended (suspend_count == 0). */ + struct Act *ipt_next; + + + + /*** Thread linkage ***/ + + /* Thread this activation is in, zero if not in use. + The thread holds a reference on the activation while this is nonzero. */ + struct thread *thread; + + /* The rest in this section is only valid when thread is nonzero. */ + + /* Next higher and next lower activation on the thread's activation stack. + For a topmost activation or the null_act, higher is undefined. + The bottommost activation is always the null_act. */ + struct Act *higher, *lower; + + /* Alert bits pending at this activation; + some of them may have propagated from lower activations. */ + unsigned alerts; + + /* Mask of alert bits to be allowed to pass through from lower levels. */ + unsigned alert_mask; + + + + /*** Control information ***/ + + /* Number of outstanding suspensions on this activation. */ + int suspend_count; + + /* This is normally true, but is set to false when the activation is terminated. */ + int active; + + /* Chain of return handlers to be called + before the thread is allowed to return to this invocation */ + ReturnHandler *handlers; + + /* A special ReturnHandler attached to the above chain to handle suspension and such */ + ReturnHandler special_handler; + + + + /* Special ports attached to this activation */ + struct ipc_port *self; /* not a right, doesn't hold ref */ + struct ipc_port *self_port; /* a send right */ + struct ipc_port *exception_port; /* a send right */ + struct ipc_port *syscall_port; /* a send right */ +#if MACH_IPC_COMPAT + struct ipc_port *reply_port; /* a send right */ + struct task *reply_task; +#endif MACH_IPC_COMPAT +}; +typedef struct Act Act; +typedef struct Act *act_t; +typedef mach_port_t *act_array_t; + +#define ACT_NULL ((Act*)0) + + +/* Exported to world */ +kern_return_t act_create(struct task *task, vm_offset_t user_stack, vm_offset_t user_rbuf, vm_size_t user_rbuf_size, struct Act **new_act); +kern_return_t act_alert_mask(struct Act *act, unsigned alert_mask); +kern_return_t act_alert(struct Act *act, unsigned alerts); +kern_return_t act_abort(struct Act *act); +kern_return_t act_abort_safely(struct Act *act); +kern_return_t act_terminate(struct Act *act); +kern_return_t act_suspend(struct Act *act); +kern_return_t act_resume(struct Act *act); +kern_return_t act_get_state(struct Act *act, int flavor, + natural_t *state, natural_t *pcount); +kern_return_t act_set_state(struct Act *act, int flavor, + natural_t *state, natural_t count); + +#define act_lock(act) simple_lock(&(act)->lock) +#define act_unlock(act) simple_unlock(&(act)->lock) + +#define act_reference(act) refcount_take(&(act)->ref_count) +void act_deallocate(struct Act *act); + +/* Exported to startup.c */ +void act_init(void); + +/* Exported to task.c */ +kern_return_t act_terminate_task_locked(struct Act *act); + +/* Exported to thread.c */ +extern Act null_act; +kern_return_t act_create_kernel(Act **out_act); + +/* Exported to machine-dependent activation code */ +void act_execute_returnhandlers(void); + + + +/* System-dependent functions */ +kern_return_t act_machine_create(struct task *task, Act *inc, vm_offset_t user_stack, vm_offset_t user_rbuf, vm_size_t user_rbuf_size); +void act_machine_destroy(Act *inc); +kern_return_t act_machine_set_state(Act *inc, int flavor, int *tstate, unsigned count); +kern_return_t act_machine_get_state(Act *inc, int flavor, int *tstate, unsigned *count); + + + +#endif /* MIGRATING_THREADS */ +#endif _KERN_ACT_H_ diff --git a/kern/assert.h b/kern/assert.h new file mode 100644 index 0000000..f98662b --- /dev/null +++ b/kern/assert.h @@ -0,0 +1,58 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_ASSERT_H_ +#define _KERN_ASSERT_H_ + +/* assert.h 4.2 85/01/21 */ + +#include <kern/macro_help.h> + +#ifdef DEBUG +#define MACH_ASSERT 1 +#endif + +#if MACH_ASSERT +extern void Assert(char *exp, char *filename, int line); + +#define assert(ex) \ +MACRO_BEGIN \ + if (!(ex)) \ + Assert(#ex, __FILE__, __LINE__); \ +MACRO_END + +#ifdef lint +#define assert_static(x) +#else lint +#define assert_static(x) assert(x) +#endif lint + +#else MACH_ASSERT +#define assert(ex) +#define assert_static(ex) +#endif MACH_ASSERT + +#endif _KERN_ASSERT_H_ diff --git a/kern/ast.c b/kern/ast.c new file mode 100644 index 0000000..fc26f94 --- /dev/null +++ b/kern/ast.c @@ -0,0 +1,242 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * + * This file contains routines to check whether an ast is needed. + * + * ast_check() - check whether ast is needed for interrupt or context + * switch. Usually called by clock interrupt handler. + * + */ + +#include <cpus.h> +#include <mach_fixpri.h> +#include <norma_ipc.h> + +#include <kern/ast.h> +#include <kern/counters.h> +#include "cpu_number.h" +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/processor.h> + +#include <machine/machspl.h> /* for splsched */ + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif MACH_FIXPRI + + +volatile ast_t need_ast[NCPUS]; + +void +ast_init() +{ +#ifndef MACHINE_AST + register int i; + + for (i=0; i<NCPUS; i++) + need_ast[i] = 0; +#endif MACHINE_AST +} + +void +ast_taken() +{ + register thread_t self = current_thread(); + register ast_t reasons; + + /* + * Interrupts are still disabled. + * We must clear need_ast and then enable interrupts. + */ + + reasons = need_ast[cpu_number()]; + need_ast[cpu_number()] = AST_ZILCH; + (void) spl0(); + + /* + * These actions must not block. + */ + + if (reasons & AST_NETWORK) + net_ast(); + +#if NORMA_IPC + if (reasons & AST_NETIPC) + netipc_ast(); +#endif NORMA_IPC + + /* + * Make darn sure that we don't call thread_halt_self + * or thread_block from the idle thread. + */ + + if (self != current_processor()->idle_thread) { +#ifndef MIGRATING_THREADS + while (thread_should_halt(self)) + thread_halt_self(); +#endif + + /* + * One of the previous actions might well have + * woken a high-priority thread, so we use + * csw_needed in addition to AST_BLOCK. + */ + + if ((reasons & AST_BLOCK) || + csw_needed(self, current_processor())) { + counter(c_ast_taken_block++); + thread_block(thread_exception_return); + } + } +} + +void +ast_check() +{ + register int mycpu = cpu_number(); + register processor_t myprocessor; + register thread_t thread = current_thread(); + register run_queue_t rq; + spl_t s = splsched(); + + /* + * Check processor state for ast conditions. + */ + myprocessor = cpu_to_processor(mycpu); + switch(myprocessor->state) { + case PROCESSOR_OFF_LINE: + case PROCESSOR_IDLE: + case PROCESSOR_DISPATCHING: + /* + * No ast. + */ + break; + +#if NCPUS > 1 + case PROCESSOR_ASSIGN: + case PROCESSOR_SHUTDOWN: + /* + * Need ast to force action thread onto processor. + * + * XXX Should check if action thread is already there. + */ + ast_on(mycpu, AST_BLOCK); + break; +#endif NCPUS > 1 + + case PROCESSOR_RUNNING: + + /* + * Propagate thread ast to processor. If we already + * need an ast, don't look for more reasons. + */ + ast_propagate(thread, mycpu); + if (ast_needed(mycpu)) + break; + + /* + * Context switch check. The csw_needed macro isn't + * used here because the rq->low hint may be wrong, + * and fixing it here avoids an extra ast. + * First check the easy cases. + */ + if (thread->state & TH_SUSP || myprocessor->runq.count > 0) { + ast_on(mycpu, AST_BLOCK); + break; + } + + /* + * Update lazy evaluated runq->low if only timesharing. + */ +#if MACH_FIXPRI + if (myprocessor->processor_set->policies & POLICY_FIXEDPRI) { + if (csw_needed(thread,myprocessor)) { + ast_on(mycpu, AST_BLOCK); + break; + } + else { + /* + * For fixed priority threads, set first_quantum + * so entire new quantum is used. + */ + if (thread->policy == POLICY_FIXEDPRI) + myprocessor->first_quantum = TRUE; + } + } + else { +#endif MACH_FIXPRI + rq = &(myprocessor->processor_set->runq); + if (!(myprocessor->first_quantum) && (rq->count > 0)) { + register queue_t q; + /* + * This is not the first quantum, and there may + * be something in the processor_set runq. + * Check whether low hint is accurate. + */ + q = rq->runq + *(volatile int *)&rq->low; + if (queue_empty(q)) { + register int i; + + /* + * Need to recheck and possibly update hint. + */ + simple_lock(&rq->lock); + q = rq->runq + rq->low; + if (rq->count > 0) { + for (i = rq->low; i < NRQS; i++) { + if(!(queue_empty(q))) + break; + q++; + } + rq->low = i; + } + simple_unlock(&rq->lock); + } + + if (rq->low <= thread->sched_pri) { + ast_on(mycpu, AST_BLOCK); + break; + } + } +#if MACH_FIXPRI + } +#endif MACH_FIXPRI + break; + + default: + panic("ast_check: Bad processor state (cpu %d processor %08x) state: %d", + mycpu, myprocessor, myprocessor->state); + } + + (void) splx(s); +} diff --git a/kern/ast.h b/kern/ast.h new file mode 100644 index 0000000..a7b8586 --- /dev/null +++ b/kern/ast.h @@ -0,0 +1,132 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * kern/ast.h: Definitions for Asynchronous System Traps. + */ + +#ifndef _KERN_AST_H_ +#define _KERN_AST_H_ + +/* + * A CPU takes an AST when it is about to return to user code. + * Instead of going back to user code, it calls ast_taken. + * Machine-dependent code is responsible for maintaining + * a set of reasons for an AST, and passing this set to ast_taken. + */ + +#include <cpus.h> + +#include "cpu_number.h" +#include <kern/macro_help.h> +#include <machine/ast.h> + +/* + * Bits for reasons + */ + +#define AST_ZILCH 0x0 +#define AST_HALT 0x1 +#define AST_TERMINATE 0x2 +#define AST_BLOCK 0x4 +#define AST_NETWORK 0x8 +#define AST_NETIPC 0x10 + +#define AST_SCHEDULING (AST_HALT|AST_TERMINATE|AST_BLOCK) + +/* + * Per-thread ASTs are reset at context-switch time. + * machine/ast.h can define MACHINE_AST_PER_THREAD. + */ + +#ifndef MACHINE_AST_PER_THREAD +#define MACHINE_AST_PER_THREAD 0 +#endif + +#define AST_PER_THREAD (AST_HALT | AST_TERMINATE | MACHINE_AST_PER_THREAD) + +typedef unsigned int ast_t; + +extern volatile ast_t need_ast[NCPUS]; + +#ifdef MACHINE_AST +/* + * machine/ast.h is responsible for defining aston and astoff. + */ +#else MACHINE_AST + +#define aston(mycpu) +#define astoff(mycpu) + +#endif MACHINE_AST + +extern void ast_taken(); + +/* + * ast_needed, ast_on, ast_off, ast_context, and ast_propagate + * assume splsched. mycpu is always cpu_number(). It is an + * argument in case cpu_number() is expensive. + */ + +#define ast_needed(mycpu) need_ast[mycpu] + +#define ast_on(mycpu, reasons) \ +MACRO_BEGIN \ + if ((need_ast[mycpu] |= (reasons)) != AST_ZILCH) \ + { aston(mycpu); } \ +MACRO_END + +#define ast_off(mycpu, reasons) \ +MACRO_BEGIN \ + if ((need_ast[mycpu] &= ~(reasons)) == AST_ZILCH) \ + { astoff(mycpu); } \ +MACRO_END + +#define ast_propagate(thread, mycpu) ast_on((mycpu), (thread)->ast) + +#define ast_context(thread, mycpu) \ +MACRO_BEGIN \ + if ((need_ast[mycpu] = \ + (need_ast[mycpu] &~ AST_PER_THREAD) | (thread)->ast) \ + != AST_ZILCH) \ + { aston(mycpu); } \ + else \ + { astoff(mycpu); } \ +MACRO_END + + +#define thread_ast_set(thread, reason) (thread)->ast |= (reason) +#define thread_ast_clear(thread, reason) (thread)->ast &= ~(reason) +#define thread_ast_clear_all(thread) (thread)->ast = AST_ZILCH + +/* + * NOTE: if thread is the current thread, thread_ast_set should + * be followed by ast_propagate(). + */ + +#endif _KERN_AST_H_ diff --git a/kern/bootstrap.c b/kern/bootstrap.c new file mode 100644 index 0000000..f1e3c43 --- /dev/null +++ b/kern/bootstrap.c @@ -0,0 +1,489 @@ +/* + * Mach Operating System + * Copyright (c) 1992-1989 Carnegie Mellon University. + * Copyright (c) 1995-1993 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Bootstrap the various built-in servers. + */ +#include <mach_kdb.h> +#include <bootstrap_symbols.h> + +#include <mach/port.h> +#include <mach/message.h> +#include "vm_param.h" +#include <ipc/ipc_port.h> +#include <kern/host.h> +#include <kern/strings.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <vm/vm_kern.h> +#include <device/device_port.h> + +#include <sys/varargs.h> + +#include <mach/machine/multiboot.h> +#include <mach/exec/exec.h> + +#if MACH_KDB +#include <machine/db_machdep.h> +#include <ddb/db_sym.h> +#endif + + +static mach_port_t boot_device_port; /* local name */ +static mach_port_t boot_host_port; /* local name */ + +extern struct multiboot_info *boot_info; +extern char *kernel_cmdline; + +static void user_bootstrap(); /* forward */ +static void bootstrap_exec(void *exec_data); + +static mach_port_t +task_insert_send_right( + task_t task, + ipc_port_t port) +{ + mach_port_t name; + + for (name = 1;; name++) { + kern_return_t kr; + + kr = mach_port_insert_right(task->itk_space, name, + (ipc_object_t)port, MACH_MSG_TYPE_PORT_SEND); + if (kr == KERN_SUCCESS) + break; + assert(kr == KERN_NAME_EXISTS); + } + + return name; +} + +void bootstrap_create() +{ + struct multiboot_module *bmod; + + if (!(boot_info->flags & MULTIBOOT_MODS) + || (boot_info->mods_count == 0)) + panic("No bootstrap code loaded with the kernel!"); + if (boot_info->mods_count > 1) + printf("Warning: only one boot module currently used by Mach\n"); + bmod = (struct multiboot_module *)phystokv(boot_info->mods_addr); + bootstrap_exec((void*)phystokv(bmod->mod_start)); + + /* XXX at this point, we could free all the memory used + by the boot modules and the boot loader's descriptors and such. */ +} + +/* XXX won't work with more than one bootstrap service */ +static void *boot_exec; + +static void +bootstrap_exec(void *e) +{ + task_t bootstrap_task; + thread_t bootstrap_thread; + + /* + * Create the bootstrap task. + */ + + (void) task_create(TASK_NULL, FALSE, &bootstrap_task); + (void) thread_create(bootstrap_task, &bootstrap_thread); + + /* + * Insert send rights to the master host and device ports. + */ + + boot_host_port = + task_insert_send_right(bootstrap_task, + ipc_port_make_send(realhost.host_priv_self)); + + boot_device_port = + task_insert_send_right(bootstrap_task, + ipc_port_make_send(master_device_port)); + + /* + * Start the bootstrap thread. + */ + boot_exec = e; + thread_start(bootstrap_thread, user_bootstrap); + (void) thread_resume(bootstrap_thread); +} + +/* + * The following code runs as the kernel mode portion of the + * first user thread. + */ + +/* + * Convert an unsigned integer to its decimal representation. + */ +static void +itoa( + char *str, + vm_size_t num) +{ + char buf[sizeof(vm_size_t)*2+3]; + register char *np; + + np = buf + sizeof(buf); + *--np = 0; + + do { + *--np = '0' + num % 10; + num /= 10; + } while (num != 0); + + strcpy(str, np); +} + +/* + * Collect the boot flags into a single argument string, + * for compatibility with existing bootstrap and startup code. + * Format as a standard flag argument: '-qsdn...' + */ +static void get_compat_strings(char *flags_str, char *root_str) +{ + register char *ip, *cp; + + cp = flags_str; + *cp++ = '-'; + + for (ip = kernel_cmdline; *ip; ) + { + if (*ip == ' ') + { + ip++; + } + else if (*ip == '-') + { + ip++; + while (*ip > ' ') + *cp++ = *ip++; + } + else if (strncmp(ip, "root=", 5) == 0) + { + char *rp = root_str; + + ip += 5; + if (strncmp(ip, "/dev/", 5) == 0) + ip += 5; + while (*ip > ' ') + *rp++ = *ip++; + *rp = '\0'; + } + else + { + while (*ip > ' ') + ip++; + } + } + + if (cp == &flags_str[1]) /* no flags */ + *cp++ = 'x'; + *cp = '\0'; +} + +/* + * Copy boot_data (executable) to the user portion of this task. + */ +static boolean_t load_protect_text = TRUE; +#if MACH_KDB + /* if set, fault in the text segment */ +static boolean_t load_fault_in_text = TRUE; +#endif + +static vm_offset_t +boot_map( + void * data, /* private data */ + vm_offset_t offset) /* offset to map */ +{ + vm_offset_t start_offset = (vm_offset_t) data; + + return pmap_extract(kernel_pmap, start_offset + offset); +} + + +#if BOOTSTRAP_SYMBOLS +static boolean_t load_bootstrap_symbols = TRUE; +#else +static boolean_t load_bootstrap_symbols = FALSE; +#endif + + + +static int boot_read(void *handle, vm_offset_t file_ofs, void *buf, vm_size_t size, + vm_size_t *out_actual) +{ + memcpy(buf, handle + file_ofs, size); + *out_actual = size; + return 0; +} + +static int read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size, + vm_offset_t mem_addr, vm_size_t mem_size, + exec_sectype_t sec_type) +{ + vm_map_t user_map = current_task()->map; + vm_offset_t start_page, end_page; + vm_prot_t mem_prot = sec_type & EXEC_SECTYPE_PROT_MASK; + int err; + + if (!(sec_type & EXEC_SECTYPE_ALLOC)) + return 0; + + assert(mem_size > 0); + assert(mem_size >= file_size); + + start_page = trunc_page(mem_addr); + end_page = round_page(mem_addr + mem_size); + + /* + printf("reading bootstrap section %08x-%08x-%08x prot %d pages %08x-%08x\n", + mem_addr, mem_addr+file_size, mem_addr+mem_size, mem_prot, start_page, end_page); + */ + + err = vm_allocate(user_map, &start_page, end_page - start_page, FALSE); + assert(err == 0); + assert(start_page == trunc_page(mem_addr)); + + if (file_size > 0) + { + err = copyout(handle + file_ofs, mem_addr, file_size); + assert(err == 0); + } + + if (mem_prot != VM_PROT_ALL) + { + err = vm_protect(user_map, start_page, end_page - start_page, FALSE, mem_prot); + assert(err == 0); + } +} + +static void copy_bootstrap(void *e, struct exec_info *boot_exec_info) +{ + register vm_map_t user_map = current_task()->map; + int err; + +printf("loading...\n"); + if (err = exec_load(boot_read, read_exec, e, boot_exec_info)) + panic("Cannot load user-bootstrap image: error code %d", err); + +#if MACH_KDB + /* + * Enter the bootstrap symbol table. + */ + +#if 0 /*XXX*/ + if (load_bootstrap_symbols) + (void) X_db_sym_init( + (char*) boot_start+lp->sym_offset, + (char*) boot_start+lp->sym_offset+lp->sym_size, + "bootstrap", + (char *) user_map); +#endif + +#if 0 /*XXX*/ + if (load_fault_in_text) + { + vm_offset_t lenp = round_page(lp->text_start+lp->text_size) - + trunc_page(lp->text_start); + vm_offset_t i = 0; + + while (i < lenp) + { + vm_fault(user_map, text_page_start +i, + load_protect_text ? + VM_PROT_READ|VM_PROT_EXECUTE : + VM_PROT_READ|VM_PROT_EXECUTE | VM_PROT_WRITE, + 0,0,0); + i = round_page (i+1); + } + } +#endif +#endif MACH_KDB +} + +/* + * Allocate the stack, and build the argument list. + */ +extern vm_offset_t user_stack_low(); +extern vm_offset_t set_user_regs(); + +void +static build_args_and_stack(boot_exec_info, va_alist) + struct exec_info *boot_exec_info; + va_dcl +{ + vm_offset_t stack_base; + vm_size_t stack_size; + va_list argv_ptr; + register + char * arg_ptr; + int arg_len; + int arg_count; + register + char * arg_pos; + int arg_item_len; + char * string_pos; + char * zero = (char *)0; + +#define STACK_SIZE (64*1024) + + /* + * Calculate the size of the argument list. + */ + va_start(argv_ptr); + arg_len = 0; + arg_count = 0; + for (;;) { + arg_ptr = va_arg(argv_ptr, char *); + if (arg_ptr == 0) + break; + arg_count++; + arg_len += strlen(arg_ptr) + 1; + } + va_end(argv_ptr); + + /* + * Add space for: + * arg count + * pointers to arguments + * trailing 0 pointer + * dummy 0 pointer to environment variables + * and align to integer boundary + */ + arg_len += sizeof(integer_t) + + (2 + arg_count) * sizeof(char *); + arg_len = (arg_len + sizeof(integer_t) - 1) & ~(sizeof(integer_t)-1); + + /* + * Allocate the stack. + */ + stack_size = round_page(STACK_SIZE); + stack_base = user_stack_low(stack_size); + (void) vm_allocate(current_task()->map, + &stack_base, + stack_size, + FALSE); + + arg_pos = (char *) + set_user_regs(stack_base, stack_size, boot_exec_info, arg_len); + + /* + * Start the strings after the arg-count and pointers + */ + string_pos = arg_pos + + sizeof(integer_t) + + arg_count * sizeof(char *) + + 2 * sizeof(char *); + + /* + * first the argument count + */ + (void) copyout((char *)&arg_count, + arg_pos, + sizeof(integer_t)); + arg_pos += sizeof(integer_t); + + /* + * Then the strings and string pointers for each argument + */ + va_start(argv_ptr); + while (--arg_count >= 0) { + arg_ptr = va_arg(argv_ptr, char *); + arg_item_len = strlen(arg_ptr) + 1; /* include trailing 0 */ + + /* set string pointer */ + (void) copyout((char *)&string_pos, + arg_pos, + sizeof (char *)); + arg_pos += sizeof(char *); + + /* copy string */ + (void) copyout(arg_ptr, string_pos, arg_item_len); + string_pos += arg_item_len; + } + va_end(argv_ptr); + + /* + * last, the trailing 0 argument and a null environment pointer. + */ + (void) copyout((char *)&zero, arg_pos, sizeof(char *)); + arg_pos += sizeof(char *); + (void) copyout((char *)&zero, arg_pos, sizeof(char *)); +} + +static void user_bootstrap() +{ + struct exec_info boot_exec_info; + + char host_string[12]; + char device_string[12]; + char flag_string[12]; + char root_string[12]; + + /* + * Copy the bootstrap code from boot_exec into the user task. + */ + copy_bootstrap(boot_exec, &boot_exec_info); + + /* + * Convert the host and device ports to strings, + * to put in the argument list. + */ + itoa(host_string, boot_host_port); + itoa(device_string, boot_device_port); + + /* + * Get the (compatibility) boot flags and root name strings. + */ + get_compat_strings(flag_string, root_string); + + /* + * Build the argument list and insert in the user task. + * Argument list is + * "bootstrap -<boothowto> <host_port> <device_port> <root_name>" + */ + build_args_and_stack(&boot_exec_info, + "bootstrap", + flag_string, + host_string, + device_string, + root_string, + (char *)0); + +printf("Starting bootstrap at %x\n", boot_exec_info.entry); + + /* + * Exit to user thread. + */ + thread_bootstrap_return(); + /*NOTREACHED*/ +} + diff --git a/kern/compat_xxx_defs.h b/kern/compat_xxx_defs.h new file mode 100644 index 0000000..1878bb2 --- /dev/null +++ b/kern/compat_xxx_defs.h @@ -0,0 +1,64 @@ +/* + * Mach Operating System + * Copyright (c) 1991 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Compatibility definitions for the MiG-related changes + * to various routines. + * + * When all user code has been relinked, this file and the xxx_ + * and yyy_ routines MUST be removed! + */ + +/* from mach.defs */ + +#define xxx_task_info task_info +#ifdef MIGRATING_THREADS +#define xxx_thread_get_state act_get_state +#define xxx_thread_set_state act_set_state +#define xxx_thread_info act_info +#else +#define xxx_thread_get_state thread_get_state +#define xxx_thread_set_state thread_set_state +#define xxx_thread_info thread_info +#endif /* MIGRATING_THREADS */ + +/* from mach_host.defs */ + +#define yyy_host_info host_info +#define yyy_processor_info processor_info +#define yyy_processor_set_info processor_set_info +#define yyy_processor_control processor_control + +/* from device.defs */ + +#define ds_xxx_device_set_status ds_device_set_status +#define ds_xxx_device_get_status ds_device_get_status +#define ds_xxx_device_set_filter ds_device_set_filter + + + diff --git a/kern/counters.c b/kern/counters.c new file mode 100644 index 0000000..5b606f5 --- /dev/null +++ b/kern/counters.c @@ -0,0 +1,82 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach_counters.h> + +#include <kern/counters.h> + +/* + * We explicitly initialize the counters to make + * them contiguous in the kernel's data space. + * This makes them easier to examine with ddb. + */ + +mach_counter_t c_thread_invoke_hits = 0; +mach_counter_t c_thread_invoke_misses = 0; +mach_counter_t c_thread_invoke_csw = 0; +mach_counter_t c_thread_handoff_hits = 0; +mach_counter_t c_thread_handoff_misses = 0; + +#if MACH_COUNTERS +mach_counter_t c_threads_current = 0; +mach_counter_t c_threads_max = 0; +mach_counter_t c_threads_min = 0; +mach_counter_t c_threads_total = 0; +mach_counter_t c_stacks_current = 0; +mach_counter_t c_stacks_max = 0; +mach_counter_t c_stacks_min = 0; +mach_counter_t c_stacks_total = 0; +mach_counter_t c_clock_ticks = 0; +mach_counter_t c_ipc_mqueue_send_block = 0; +mach_counter_t c_ipc_mqueue_receive_block_user = 0; +mach_counter_t c_ipc_mqueue_receive_block_kernel = 0; +mach_counter_t c_mach_msg_trap_block_fast = 0; +mach_counter_t c_mach_msg_trap_block_slow = 0; +mach_counter_t c_mach_msg_trap_block_exc = 0; +mach_counter_t c_exception_raise_block = 0; +mach_counter_t c_swtch_block = 0; +mach_counter_t c_swtch_pri_block = 0; +mach_counter_t c_thread_switch_block = 0; +mach_counter_t c_thread_switch_handoff = 0; +mach_counter_t c_ast_taken_block = 0; +mach_counter_t c_thread_halt_self_block = 0; +mach_counter_t c_vm_fault_page_block_busy_user = 0; +mach_counter_t c_vm_fault_page_block_busy_kernel = 0; +mach_counter_t c_vm_fault_page_block_backoff_user = 0; +mach_counter_t c_vm_fault_page_block_backoff_kernel = 0; +mach_counter_t c_vm_page_wait_block_user = 0; +mach_counter_t c_vm_page_wait_block_kernel = 0; +mach_counter_t c_vm_pageout_block = 0; +mach_counter_t c_vm_pageout_scan_block = 0; +mach_counter_t c_idle_thread_block = 0; +mach_counter_t c_idle_thread_handoff = 0; +mach_counter_t c_sched_thread_block = 0; +mach_counter_t c_io_done_thread_block = 0; +mach_counter_t c_net_thread_block = 0; +mach_counter_t c_reaper_thread_block = 0; +mach_counter_t c_swapin_thread_block = 0; +mach_counter_t c_action_thread_block = 0; +#endif MACH_COUNTERS diff --git a/kern/counters.h b/kern/counters.h new file mode 100644 index 0000000..1f13ac5 --- /dev/null +++ b/kern/counters.h @@ -0,0 +1,107 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_COUNTERS_ +#define _KERN_COUNTERS_ + +#include <mach_counters.h> + +/* + * We can count various interesting events and paths. + * + * Use counter() to change the counters, eg: + * counter(c_idle_thread_block++); + * Use counter_always() for non-conditional counters. + */ + +#define counter_always(code) code + +#if MACH_COUNTERS + +#define counter(code) counter_always(code) + +#else MACH_COUNTERS + +#define counter(code) + +#endif MACH_COUNTERS + +/* + * We define the counters with individual integers, + * instead of a big structure, so that ddb + * will know the addresses of the counters. + */ + +typedef unsigned int mach_counter_t; + +extern mach_counter_t c_thread_invoke_hits; +extern mach_counter_t c_thread_invoke_misses; +extern mach_counter_t c_thread_invoke_csw; +extern mach_counter_t c_thread_handoff_hits; +extern mach_counter_t c_thread_handoff_misses; + +#if MACH_COUNTERS +extern mach_counter_t c_threads_current; +extern mach_counter_t c_threads_max; +extern mach_counter_t c_threads_min; +extern mach_counter_t c_threads_total; +extern mach_counter_t c_stacks_current; +extern mach_counter_t c_stacks_max; +extern mach_counter_t c_stacks_min; +extern mach_counter_t c_stacks_total; +extern mach_counter_t c_clock_ticks; +extern mach_counter_t c_ipc_mqueue_send_block; +extern mach_counter_t c_ipc_mqueue_receive_block_user; +extern mach_counter_t c_ipc_mqueue_receive_block_kernel; +extern mach_counter_t c_mach_msg_trap_block_fast; +extern mach_counter_t c_mach_msg_trap_block_slow; +extern mach_counter_t c_mach_msg_trap_block_exc; +extern mach_counter_t c_exception_raise_block; +extern mach_counter_t c_swtch_block; +extern mach_counter_t c_swtch_pri_block; +extern mach_counter_t c_thread_switch_block; +extern mach_counter_t c_thread_switch_handoff; +extern mach_counter_t c_ast_taken_block; +extern mach_counter_t c_thread_halt_self_block; +extern mach_counter_t c_vm_fault_page_block_busy_user; +extern mach_counter_t c_vm_fault_page_block_busy_kernel; +extern mach_counter_t c_vm_fault_page_block_backoff_user; +extern mach_counter_t c_vm_fault_page_block_backoff_kernel; +extern mach_counter_t c_vm_page_wait_block_user; +extern mach_counter_t c_vm_page_wait_block_kernel; +extern mach_counter_t c_vm_pageout_block; +extern mach_counter_t c_vm_pageout_scan_block; +extern mach_counter_t c_idle_thread_block; +extern mach_counter_t c_idle_thread_handoff; +extern mach_counter_t c_sched_thread_block; +extern mach_counter_t c_io_done_thread_block; +extern mach_counter_t c_net_thread_block; +extern mach_counter_t c_reaper_thread_block; +extern mach_counter_t c_swapin_thread_block; +extern mach_counter_t c_action_thread_block; +#endif MACH_COUNTERS + +#endif _KERN_COUNTERS_ diff --git a/kern/cpu_number.h b/kern/cpu_number.h new file mode 100644 index 0000000..32d8323 --- /dev/null +++ b/kern/cpu_number.h @@ -0,0 +1,43 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_CPU_NUMBER_H_ +#define _KERN_CPU_NUMBER_H_ + +#include <cpus.h> + +/* + * Definitions for cpu identification in multi-processors. + */ + +int master_cpu; /* 'master' processor - keeps time */ + +#if (NCPUS == 1) + /* cpu number is always 0 on a single processor system */ +#define cpu_number() (0) + +#endif /* NCPUS == 1 */ +#endif /* _KERN_CPU_NUMBER_H_ */ diff --git a/kern/debug.c b/kern/debug.c new file mode 100644 index 0000000..eda5b2a --- /dev/null +++ b/kern/debug.c @@ -0,0 +1,192 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach_kdb.h> +#include <norma_ipc.h> +#include <cpus.h> + +#include "cpu_number.h" +#include <kern/lock.h> +#include <sys/varargs.h> +#include <kern/thread.h> + + + +extern void cnputc(); +void Debugger(); + +#if MACH_KDB +extern int db_breakpoints_inserted; +#endif + +#if NCPUS>1 +simple_lock_data_t Assert_print_lock; /* uninited, we take our chances */ +#endif + +void +Assert(char *exp, char *file, int line) +{ +#if NCPUS > 1 + simple_lock(&Assert_print_lock); + printf("{%d} Assertion failed: file \"%s\", line %d\n", + cpu_number(), file, line); + simple_unlock(&Assert_print_lock); +#else + printf("Assertion `%s' failed in file \"%s\", line %d\n", + exp, file, line); +#endif + +#if MACH_KDB + if (db_breakpoints_inserted) +#endif + Debugger("assertion failure"); +} + +void Debugger(message) + char * message; +{ +#if !MACH_KDB + panic("Debugger invoked, but there isn't one!"); +#endif + +#ifdef lint + message++; +#endif /* lint */ + +#if defined(vax) || defined(PC532) + asm("bpt"); +#endif /* vax */ + +#ifdef sun3 + current_thread()->pcb->flag |= TRACE_KDB; + asm("orw #0x00008000,sr"); +#endif /* sun3 */ +#ifdef sun4 + current_thread()->pcb->pcb_flag |= TRACE_KDB; + asm("ta 0x81"); +#endif /* sun4 */ + +#if defined(mips ) || defined(luna88k) || defined(i860) || defined(alpha) + gimmeabreak(); +#endif + +#ifdef i386 + asm("int3"); +#endif +} + +/* Be prepared to panic anytime, + even before panic_init() gets called from the "normal" place in kern/startup.c. + (panic_init() still needs to be called from there + to make sure we get initialized before starting multiple processors.) */ +boolean_t panic_lock_initialized = FALSE; +decl_simple_lock_data(, panic_lock) + +char *panicstr; +int paniccpu; + +void +panic_init() +{ + if (!panic_lock_initialized) + { + panic_lock_initialized = TRUE; + simple_lock_init(&panic_lock); + } +} + +/*VARARGS1*/ +void +panic(s, va_alist) + char * s; + va_dcl +{ + va_list listp; +#if NORMA_IPC + extern int _node_self; /* node_self() may not be callable yet */ +#endif /* NORMA_IPC */ + + panic_init(); + + simple_lock(&panic_lock); + if (panicstr) { + if (cpu_number() != paniccpu) { + simple_unlock(&panic_lock); + halt_cpu(); + /* NOTREACHED */ + } + } + else { + panicstr = s; + paniccpu = cpu_number(); + } + simple_unlock(&panic_lock); + printf("panic"); +#if NORMA_IPC + printf("(node %U)", _node_self); +#endif +#if NCPUS > 1 + printf("(cpu %U)", paniccpu); +#endif + printf(": "); + va_start(listp); + _doprnt(s, &listp, cnputc, 0); + va_end(listp); + printf("\n"); + + /* Give the user time to see the message */ + { + int i = 60; /* seconds */ + while (i--) + delay (1000000); /* microseconds */ + } + +#if MACH_KDB + Debugger("panic"); +#else + halt_all_cpus (1); +#endif +} + +/* + * We'd like to use BSD's log routines here... + */ +/*VARARGS2*/ +void +log(level, fmt, va_alist) + int level; + char * fmt; + va_dcl +{ + va_list listp; + +#ifdef lint + level++; +#endif + va_start(listp); + _doprnt(fmt, &listp, cnputc, 0); + va_end(listp); +} diff --git a/kern/debug.h b/kern/debug.h new file mode 100644 index 0000000..3520140 --- /dev/null +++ b/kern/debug.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: debug.h + * Author: Bryan Ford + * + * This file contains definitions for kernel debugging, + * which are compiled in on the DEBUG symbol. + * + */ +#ifndef _mach_debug__debug_ +#define _mach_debug__debug_ + +#include <kern/assert.h> /*XXX*/ + +#ifdef DEBUG + +#define here() printf("@ %s:%d\n", __FILE__, __LINE__) +#define message(args) ({ printf("@ %s:%d: ", __FILE__, __LINE__); printf args; printf("\n"); }) + +#define otsan() panic("%s:%d: off the straight and narrow!", __FILE__, __LINE__) + +#define struct_id_decl unsigned struct_id; +#define struct_id_init(p,id) ((p)->struct_id = (id)) +#define struct_id_denit(p) ((p)->struct_id = 0) +#define struct_id_verify(p,id) \ + ({ if ((p)->struct_id != (id)) \ + panic("%s:%d: "#p" (%08x) struct_id should be "#id" (%08x), is %08x\n", \ + __FILE__, __LINE__, (p), (id), (p->struct_id)); \ + }) + +#else !DEBUG + +#define otsan() + +#define struct_id_decl +#define struct_id_init(p,id) +#define struct_id_denit(p) +#define struct_id_verify(p,id) + +#endif !DEBUG + +#endif _mach_debug__debug_ diff --git a/kern/elf-load.c b/kern/elf-load.c new file mode 100644 index 0000000..1d103d3 --- /dev/null +++ b/kern/elf-load.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 1995, 1994, 1993, 1992, 1991, 1990 + * Open Software Foundation, Inc. + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of ("OSF") or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL OSF BE LIABLE FOR ANY + * SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE + */ +/* + * OSF Research Institute MK6.1 (unencumbered) 1/31/1995 + */ + +#include <alloca.h> +#include <mach/machine/vm_types.h> +#include <mach/exec/elf.h> +#include <mach/exec/exec.h> + +int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec, + void *handle, exec_info_t *out_info) +{ + vm_size_t actual; + Elf32_Ehdr x; + Elf32_Phdr *phdr, *ph; + vm_size_t phsize; + int i; + int result; + + /* Read the ELF header. */ + if ((result = (*read)(handle, 0, &x, sizeof(x), &actual)) != 0) + return result; + if (actual < sizeof(x)) + return EX_NOT_EXECUTABLE; + + if ((x.e_ident[EI_MAG0] != ELFMAG0) || + (x.e_ident[EI_MAG1] != ELFMAG1) || + (x.e_ident[EI_MAG2] != ELFMAG2) || + (x.e_ident[EI_MAG3] != ELFMAG3)) + return EX_NOT_EXECUTABLE; + + /* Make sure the file is of the right architecture. */ + if ((x.e_ident[EI_CLASS] != ELFCLASS32) || + (x.e_ident[EI_DATA] != MY_EI_DATA) || + (x.e_machine != MY_E_MACHINE)) + return EX_WRONG_ARCH; + + /* XXX others */ + out_info->entry = (vm_offset_t) x.e_entry; + + phsize = x.e_phnum * x.e_phentsize; + phdr = (Elf32_Phdr *)alloca(phsize); + + result = (*read)(handle, x.e_phoff, phdr, phsize, &actual); + if (result) + return result; + if (actual < phsize) + return EX_CORRUPT; + + for (i = 0; i < x.e_phnum; i++) + { + ph = (Elf32_Phdr *)((vm_offset_t)phdr + i * x.e_phentsize); + if (ph->p_type == PT_LOAD) + { + exec_sectype_t type = EXEC_SECTYPE_ALLOC | + EXEC_SECTYPE_LOAD; + if (ph->p_flags & PF_R) type |= EXEC_SECTYPE_READ; + if (ph->p_flags & PF_W) type |= EXEC_SECTYPE_WRITE; + if (ph->p_flags & PF_X) type |= EXEC_SECTYPE_EXECUTE; + result = (*read_exec)(handle, + ph->p_offset, ph->p_filesz, + ph->p_vaddr, ph->p_memsz, type); + } + } + + return 0; +} + diff --git a/kern/eventcount.c b/kern/eventcount.c new file mode 100644 index 0000000..9121386 --- /dev/null +++ b/kern/eventcount.c @@ -0,0 +1,372 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: eventcount.c + * Author: Alessandro Forin + * Date: 10/91 + * + * Eventcounters, for user-level drivers synchronization + * + */ + + +#include <cpus.h> + +#include <mach/machine.h> +#include <kern/ast.h> +#include "cpu_number.h" +#include <kern/lock.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> + +#include <machine/machspl.h> /* For def'n of splsched() */ + +#include <kern/eventcount.h> + + +#if NCPUS <= 1 +void simpler_thread_setrun( + thread_t th, + boolean_t may_preempt); /* forward */ +#endif + +#define MAX_EVCS 10 /* xxx for now */ +evc_t all_eventcounters[MAX_EVCS]; + +/* + * Initialization + */ +void +evc_init(evc_t ev) +{ + int i; + + bzero((char*)ev, sizeof(*ev)); + + /* keep track of who is who */ + for (i = 0; i < MAX_EVCS; i++) + if (all_eventcounters[i] == 0) break; + if (i == MAX_EVCS) { + printf("Too many eventcounters\n"); + return; + } + + all_eventcounters[i] = ev; + ev->ev_id = i; + ev->sanity = ev; + ev->waiting_thread = THREAD_NULL; + simple_lock_init(&ev->lock); +} + +/* + * Finalization + */ +void +evc_destroy(evc_t ev) +{ + evc_signal(ev); + ev->sanity = 0; + if (all_eventcounters[ev->ev_id] == ev) + all_eventcounters[ev->ev_id] = 0; + ev->ev_id = -1; +} + +/* + * Thread termination. + * HORRIBLE. This stuff needs to be fixed. + */ +void evc_notify_abort(thread_t thread) +{ + int i; + evc_t ev; + int s = splsched(); + for (i = 0; i < MAX_EVCS; i++) { + ev = all_eventcounters[i]; + if (ev) { + simple_lock(&ev->lock); + if (ev->waiting_thread == thread) + { + ev->waiting_thread = 0; + /* Removal of a waiting thread has to bump the count by one */ + ev->count++; + } + simple_unlock(&ev->lock); + } + } + splx(s); +} + +#ifdef CONTINUATIONS +/* + * Just so that we return success, and give + * up the stack while blocked + */ +static void +evc_continue(void) +{ + thread_syscall_return(KERN_SUCCESS); + /* NOTREACHED */ +} +#else /* not CONTINUATIONS */ +#define evc_continue 0 +#endif /* not CONTINUATIONS */ + +/* + * User-trappable + */ +kern_return_t evc_wait(natural_t ev_id) +{ + spl_t s; + kern_return_t ret; + evc_t ev; + + if ((ev_id >= MAX_EVCS) || + ((ev = all_eventcounters[ev_id]) == 0) || + (ev->ev_id != ev_id) || (ev->sanity != ev)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + simple_lock(&ev->lock); + /* + * The values assumed by the "count" field are + * as follows: + * 0 At initialization time, and with no + * waiting thread means no events pending; + * with waiting thread means the event + * was signalled and the thread not yet resumed + * -1 no events, there must be a waiting thread + * N>0 no waiting thread means N pending, + * with waiting thread N-1 pending. + * + */ + if (ev->count > 0) { + ev->count--; + ret = KERN_SUCCESS; + } else { + if (ev->waiting_thread == THREAD_NULL) { + ev->count--; + ev->waiting_thread = current_thread(); + assert_wait((event_t) 0, TRUE); /* ifnot race */ + simple_unlock(&ev->lock); + thread_block(evc_continue); + return KERN_SUCCESS; + } + ret = KERN_NO_SPACE; /* XX */ + } + simple_unlock(&ev->lock); + splx(s); + return ret; +} + +/* + * User-trappable + */ +kern_return_t evc_wait_clear(natural_t ev_id) +{ + spl_t s; + kern_return_t ret; + evc_t ev; + + if ((ev_id >= MAX_EVCS) || + ((ev = all_eventcounters[ev_id]) == 0) || + (ev->ev_id != ev_id) || (ev->sanity != ev)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + simple_lock(&ev->lock); + + /* + * The values assumed by the "count" field are + * as follows: + * 0 At initialization time, and with no + * waiting thread means no events pending; + * with waiting thread means the event + * was signalled and the thread not yet resumed + * -1 no events, there must be a waiting thread + * N>0 no waiting thread means N pending, + * with waiting thread N-1 pending. + * + */ + /* + * Note that we always clear count before blocking. + */ + if (ev->waiting_thread == THREAD_NULL) { + ev->count = -1; + ev->waiting_thread = current_thread(); + assert_wait((event_t) 0, TRUE); /* ifnot race */ + simple_unlock(&ev->lock); + thread_block(evc_continue); + /* NOTREACHED */ + } + + simple_unlock(&ev->lock); + splx(s); + ret = KERN_NO_SPACE; /* XX */ +} + +/* + * Called exclusively from interrupt context + */ +void +evc_signal(evc_t ev) +{ + register volatile thread_t thread; + register int state; + spl_t s; + if (ev->sanity != ev) + return; + + s = splsched(); + simple_lock(&ev->lock); + ev->count++; + if (thread = ev->waiting_thread, thread != THREAD_NULL) + { + ev->waiting_thread = 0; + +#if (NCPUS > 1) + retry: + while((thread->state & TH_RUN) || thread->lock.lock_data) + ; +#endif + thread_lock(thread); + + /* make thread runnable on this processor */ + /* taken from clear_wait */ + switch ((state = thread->state) & TH_SCHED_STATE) + { + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread_unlock(thread); +#if NCPUS > 1 + thread_setrun(thread, TRUE); +#else + simpler_thread_setrun(thread, TRUE); +#endif + break; + + case TH_RUN | TH_WAIT: +#if (NCPUS > 1) + /* + * Legal on MP: between assert_wait() + * and thread_block(), in evc_wait() above. + * + * Mmm. Maybe don't need now that the while(..) check is + * done before the thread lock is grabbed..... + */ + thread_unlock(thread); + goto retry; +#else + /*FALLTHROUGH*/ +#endif + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + + /* + * Either already running, or suspended. + * Just clear the wait. + */ + thread->state = state &~ TH_WAIT; + thread_unlock(thread); + break; + + default: + /* + * Not waiting. + */ + panic("evc_signal.3"); + thread_unlock(thread); + break; + } + } + + simple_unlock(&ev->lock); + splx(s); +} + +#if NCPUS <= 1 +/* + * The scheduler is too messy for my old little brain + */ +void +simpler_thread_setrun( + thread_t th, + boolean_t may_preempt) +{ + register struct run_queue *rq; + register whichq; + + /* + * XXX should replace queue with a boolean in this case. + */ + if (default_pset.idle_count > 0) { + processor_t processor; + + processor = (processor_t) queue_first(&default_pset.idle_queue); + queue_remove(&default_pset.idle_queue, processor, + processor_t, processor_queue); + default_pset.idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + return; + } + rq = &(master_processor->runq); + ast_on(cpu_number(), AST_BLOCK); + + whichq = (th)->sched_pri; + simple_lock(&(rq)->lock); /* lock the run queue */ + enqueue_head(&(rq)->runq[whichq], (queue_entry_t) (th)); + + if (whichq < (rq)->low || (rq)->count == 0) + (rq)->low = whichq; /* minimize */ + (rq)->count++; +#ifdef MIGRATING_THREADS + (th)->shuttle.runq = (rq); +#else + (th)->runq = (rq); +#endif + simple_unlock(&(rq)->lock); + + /* + * Turn off first_quantum to allow context switch. + */ + current_processor()->first_quantum = FALSE; +} +#endif /* NCPUS > 1 */ + diff --git a/kern/eventcount.h b/kern/eventcount.h new file mode 100644 index 0000000..e2001de --- /dev/null +++ b/kern/eventcount.h @@ -0,0 +1,57 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: eventcount.c + * Author: Alessandro Forin + * Date: 10/91 + * + * Eventcounters, for user-level drivers synchronization + * + */ + +#ifndef _KERN_EVENTCOUNT_H_ +#define _KERN_EVENTCOUNT_H_ 1 + +/* kernel visible only */ + +typedef struct evc { + int count; + thread_t waiting_thread; + natural_t ev_id; + struct evc *sanity; + decl_simple_lock_data(, lock) +} *evc_t; + +extern void evc_init(evc_t ev), + evc_destroy(evc_t ev), + evc_signal(evc_t ev), + evc_notify_abort(thread_t thread); + +/* kernel and user visible */ + +extern kern_return_t evc_wait(natural_t ev_id); + +#endif /* _KERN_EVENTCOUNT_H_ */ diff --git a/kern/exception.c b/kern/exception.c new file mode 100644 index 0000000..ebd9e5b --- /dev/null +++ b/kern/exception.c @@ -0,0 +1,1003 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992,1991,1990,1989,1988,1987 Carnegie Mellon University. + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <norma_ipc.h> +#include <mach_kdb.h> + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/message.h> +#include <mach/port.h> +#include <mach/mig_errors.h> +#include <ipc/port.h> +#include <ipc/ipc_entry.h> +#include <ipc/ipc_object.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_pset.h> +#include <ipc/mach_msg.h> +#include <ipc/ipc_machdep.h> +#include <kern/counters.h> +#include <kern/ipc_tt.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/processor.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <mach/machine/vm_types.h> + + + +extern void exception(); +extern void exception_try_task(); +extern void exception_no_server(); + +extern void exception_raise(); +extern kern_return_t exception_parse_reply(); +extern void exception_raise_continue(); +extern void exception_raise_continue_slow(); +extern void exception_raise_continue_fast(); + +#if MACH_KDB +extern void thread_kdb_return(); +extern void db_printf(); + +boolean_t debug_user_with_kdb = FALSE; +#endif /* MACH_KDB */ + +#ifdef KEEP_STACKS +/* + * Some obsolete architectures don't support kernel stack discarding + * or the thread_exception_return, thread_syscall_return continuations. + * For these architectures, the NOTREACHED comments below are incorrect. + * The exception function is expected to return. + * So the return statements along the slow paths are important. + */ +#endif KEEP_STACKS + +/* + * Routine: exception + * Purpose: + * The current thread caught an exception. + * We make an up-call to the thread's exception server. + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context, so + * thread_exception_return and thread_kdb_return + * are possible. + * Returns: + * Doesn't return. + */ + +void +exception(_exception, code, subcode) + integer_t _exception, code, subcode; +{ + register ipc_thread_t self = current_thread(); + register ipc_port_t exc_port; + + if (_exception == KERN_SUCCESS) + panic("exception"); + + /* + * Optimized version of retrieve_thread_exception. + */ + + ith_lock(self); + assert(self->ith_self != IP_NULL); + exc_port = self->ith_exception; + if (!IP_VALID(exc_port)) { + ith_unlock(self); + exception_try_task(_exception, code, subcode); + /*NOTREACHED*/ + return; + } + + ip_lock(exc_port); + ith_unlock(self); + if (!ip_active(exc_port)) { + ip_unlock(exc_port); + exception_try_task(_exception, code, subcode); + /*NOTREACHED*/ + return; + } + + /* + * Make a naked send right for the exception port. + */ + + ip_reference(exc_port); + exc_port->ip_srights++; + ip_unlock(exc_port); + + /* + * If this exception port doesn't work, + * we will want to try the task's exception port. + * Indicate this by saving the exception state. + */ + + self->ith_exc = _exception; + self->ith_exc_code = code; + self->ith_exc_subcode = subcode; + + exception_raise(exc_port, + retrieve_thread_self_fast(self), + retrieve_task_self_fast(self->task), + _exception, code, subcode); + /*NOTREACHED*/ +} + +/* + * Routine: exception_try_task + * Purpose: + * The current thread caught an exception. + * We make an up-call to the task's exception server. + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context, so + * thread_exception_return and thread_kdb_return + * are possible. + * Returns: + * Doesn't return. + */ + +void +exception_try_task(_exception, code, subcode) + integer_t _exception, code, subcode; +{ + ipc_thread_t self = current_thread(); + register task_t task = self->task; + register ipc_port_t exc_port; + + /* + * Optimized version of retrieve_task_exception. + */ + + itk_lock(task); + assert(task->itk_self != IP_NULL); + exc_port = task->itk_exception; + if (!IP_VALID(exc_port)) { + itk_unlock(task); + exception_no_server(); + /*NOTREACHED*/ + return; + } + + ip_lock(exc_port); + itk_unlock(task); + if (!ip_active(exc_port)) { + ip_unlock(exc_port); + exception_no_server(); + /*NOTREACHED*/ + return; + } + + /* + * Make a naked send right for the exception port. + */ + + ip_reference(exc_port); + exc_port->ip_srights++; + ip_unlock(exc_port); + + /* + * This is the thread's last chance. + * Clear the saved exception state. + */ + + self->ith_exc = KERN_SUCCESS; + + exception_raise(exc_port, + retrieve_thread_self_fast(self), + retrieve_task_self_fast(task), + _exception, code, subcode); + /*NOTREACHED*/ +} + +/* + * Routine: exception_no_server + * Purpose: + * The current thread took an exception, + * and no exception server took responsibility + * for the exception. So good bye, charlie. + * Conditions: + * Nothing locked and no resources held. + * Called from an exception context, so + * thread_kdb_return is possible. + * Returns: + * Doesn't return. + */ + +void +exception_no_server() +{ + register ipc_thread_t self = current_thread(); + + /* + * If this thread is being terminated, cooperate. + */ + + while (thread_should_halt(self)) + thread_halt_self(); + +#if MACH_KDB + if (debug_user_with_kdb) { + /* + * Debug the exception with kdb. + * If kdb handles the exception, + * then thread_kdb_return won't return. + */ + + db_printf("No exception server, calling kdb...\n"); + thread_kdb_return(); + } +#endif MACH_KDB + + /* + * All else failed; terminate task. + */ + + (void) task_terminate(self->task); + thread_halt_self(); + /*NOTREACHED*/ +} + +#define MACH_EXCEPTION_ID 2400 /* from mach/exc.defs */ +#define MACH_EXCEPTION_REPLY_ID (MACH_EXCEPTION_ID + 100) + +struct mach_exception { + mach_msg_header_t Head; + mach_msg_type_t threadType; + mach_port_t thread; + mach_msg_type_t taskType; + mach_port_t task; + mach_msg_type_t exceptionType; + integer_t exception; + mach_msg_type_t codeType; + integer_t code; + mach_msg_type_t subcodeType; + integer_t subcode; +}; + +#define INTEGER_T_SIZE_IN_BITS (8 * sizeof(integer_t)) +#define INTEGER_T_TYPE MACH_MSG_TYPE_INTEGER_T + /* in mach/machine/vm_types.h */ + +mach_msg_type_t exc_port_proto = { + /* msgt_name = */ MACH_MSG_TYPE_PORT_SEND, + /* msgt_size = */ PORT_T_SIZE_IN_BITS, + /* msgt_number = */ 1, + /* msgt_inline = */ TRUE, + /* msgt_longform = */ FALSE, + /* msgt_deallocate = */ FALSE, + /* msgt_unused = */ 0 +}; + +mach_msg_type_t exc_code_proto = { + /* msgt_name = */ INTEGER_T_TYPE, + /* msgt_size = */ INTEGER_T_SIZE_IN_BITS, + /* msgt_number = */ 1, + /* msgt_inline = */ TRUE, + /* msgt_longform = */ FALSE, + /* msgt_deallocate = */ FALSE, + /* msgt_unused = */ 0 +}; + +/* + * Routine: exception_raise + * Purpose: + * Make an exception_raise up-call to an exception server. + * + * dest_port must be a valid naked send right. + * thread_port and task_port are naked send rights. + * All three are always consumed. + * + * self->ith_exc, self->ith_exc_code, self->ith_exc_subcode + * must be appropriately initialized. + * Conditions: + * Nothing locked. We are being called in an exception context, + * so thread_exception_return may be called. + * Returns: + * Doesn't return. + */ + +int exception_raise_misses = 0; + +void +exception_raise(dest_port, thread_port, task_port, + _exception, code, subcode) + ipc_port_t dest_port; + ipc_port_t thread_port; + ipc_port_t task_port; + integer_t _exception, code, subcode; +{ + ipc_thread_t self = current_thread(); + ipc_thread_t receiver; + ipc_port_t reply_port; + ipc_mqueue_t dest_mqueue; + ipc_mqueue_t reply_mqueue; + ipc_kmsg_t kmsg; + mach_msg_return_t mr; + + assert(IP_VALID(dest_port)); + + /* + * We will eventually need a message buffer. + * Grab the buffer now, while nothing is locked. + * This buffer will get handed to the exception server, + * and it will give the buffer back with its reply. + */ + + kmsg = ikm_cache(); + if (kmsg != IKM_NULL) { + ikm_cache() = IKM_NULL; + ikm_check_initialized(kmsg, IKM_SAVED_KMSG_SIZE); + } else { + kmsg = ikm_alloc(IKM_SAVED_MSG_SIZE); + if (kmsg == IKM_NULL) + panic("exception_raise"); + ikm_init(kmsg, IKM_SAVED_MSG_SIZE); + } + + /* + * We need a reply port for the RPC. + * Check first for a cached port. + */ + + ith_lock(self); + assert(self->ith_self != IP_NULL); + + reply_port = self->ith_rpc_reply; + if (reply_port == IP_NULL) { + ith_unlock(self); + reply_port = ipc_port_alloc_reply(); + ith_lock(self); + if ((reply_port == IP_NULL) || + (self->ith_rpc_reply != IP_NULL)) + panic("exception_raise"); + self->ith_rpc_reply = reply_port; + } + + ip_lock(reply_port); + assert(ip_active(reply_port)); + ith_unlock(self); + + /* + * Make a naked send-once right for the reply port, + * to hand to the exception server. + * Make an extra reference for the reply port, + * to receive on. This protects us against + * mach_msg_abort_rpc. + */ + + reply_port->ip_sorights++; + ip_reference(reply_port); + + ip_reference(reply_port); + self->ith_port = reply_port; + + reply_mqueue = &reply_port->ip_messages; + imq_lock(reply_mqueue); + assert(ipc_kmsg_queue_empty(&reply_mqueue->imq_messages)); + ip_unlock(reply_port); + + /* + * Make sure we can queue to the destination port. + */ + + if (!ip_lock_try(dest_port)) { + imq_unlock(reply_mqueue); + goto slow_exception_raise; + } + + if (!ip_active(dest_port) || +#if NORMA_IPC + IP_NORMA_IS_PROXY(dest_port) || +#endif NORMA_IPC + (dest_port->ip_receiver == ipc_space_kernel)) { + imq_unlock(reply_mqueue); + ip_unlock(dest_port); + goto slow_exception_raise; + } + + /* + * Find the destination message queue. + */ + + { + register ipc_pset_t dest_pset; + + dest_pset = dest_port->ip_pset; + if (dest_pset == IPS_NULL) + dest_mqueue = &dest_port->ip_messages; + else + dest_mqueue = &dest_pset->ips_messages; + } + + if (!imq_lock_try(dest_mqueue)) { + imq_unlock(reply_mqueue); + ip_unlock(dest_port); + goto slow_exception_raise; + } + + /* + * Safe to unlock dest_port, because we hold + * dest_mqueue locked. We never bother changing + * dest_port->ip_msgcount. + */ + + ip_unlock(dest_port); + + receiver = ipc_thread_queue_first(&dest_mqueue->imq_threads); + if ((receiver == ITH_NULL) || + !((receiver->swap_func == (void (*)()) mach_msg_continue) || + ((receiver->swap_func == + (void (*)()) mach_msg_receive_continue) && + (sizeof(struct mach_exception) <= receiver->ith_msize) && + ((receiver->ith_option & MACH_RCV_NOTIFY) == 0))) || + !thread_handoff(self, exception_raise_continue, receiver)) { + imq_unlock(reply_mqueue); + imq_unlock(dest_mqueue); + goto slow_exception_raise; + } + counter(c_exception_raise_block++); + + assert(current_thread() == receiver); + + /* + * We need to finish preparing self for its + * time asleep in reply_mqueue. self is left + * holding the extra ref for reply_port. + */ + + ipc_thread_enqueue_macro(&reply_mqueue->imq_threads, self); + self->ith_state = MACH_RCV_IN_PROGRESS; + self->ith_msize = MACH_MSG_SIZE_MAX; + imq_unlock(reply_mqueue); + + /* + * Finish extracting receiver from dest_mqueue. + */ + + ipc_thread_rmqueue_first_macro( + &dest_mqueue->imq_threads, receiver); + imq_unlock(dest_mqueue); + + /* + * Release the receiver's reference for his object. + */ + { + register ipc_object_t object = receiver->ith_object; + + io_lock(object); + io_release(object); + io_check_unlock(object); + } + + { + register struct mach_exception *exc = + (struct mach_exception *) &kmsg->ikm_header; + ipc_space_t space = receiver->task->itk_space; + + /* + * We are running as the receiver now. We hold + * the following resources, which must be consumed: + * kmsg, send-once right for reply_port + * send rights for dest_port, thread_port, task_port + * Synthesize a kmsg for copyout to the receiver. + */ + + exc->Head.msgh_bits = (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, + MACH_MSG_TYPE_PORT_SEND) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_size = sizeof *exc; + /* exc->Head.msgh_remote_port later */ + /* exc->Head.msgh_local_port later */ + exc->Head.msgh_seqno = 0; + exc->Head.msgh_id = MACH_EXCEPTION_ID; + exc->threadType = exc_port_proto; + /* exc->thread later */ + exc->taskType = exc_port_proto; + /* exc->task later */ + exc->exceptionType = exc_code_proto; + exc->exception = _exception; + exc->codeType = exc_code_proto; + exc->code = code; + exc->subcodeType = exc_code_proto; + exc->subcode = subcode; + + /* + * Check that the receiver can handle the message. + */ + + if (receiver->ith_rcv_size < sizeof(struct mach_exception)) { + /* + * ipc_kmsg_destroy is a handy way to consume + * the resources we hold, but it requires setup. + */ + + exc->Head.msgh_bits = + (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, + MACH_MSG_TYPE_PORT_SEND_ONCE) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_remote_port = (mach_port_t) dest_port; + exc->Head.msgh_local_port = (mach_port_t) reply_port; + exc->thread = (mach_port_t) thread_port; + exc->task = (mach_port_t) task_port; + + ipc_kmsg_destroy(kmsg); + thread_syscall_return(MACH_RCV_TOO_LARGE); + /*NOTREACHED*/ + } + + is_write_lock(space); + assert(space->is_active); + + /* + * To do an atomic copyout, need simultaneous + * locks on both ports and the space. + */ + + ip_lock(dest_port); + if (!ip_active(dest_port) || + !ip_lock_try(reply_port)) { + abort_copyout: + ip_unlock(dest_port); + is_write_unlock(space); + + /* + * Oh well, we have to do the header the slow way. + * First make it look like it's in-transit. + */ + + exc->Head.msgh_bits = + (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, + MACH_MSG_TYPE_PORT_SEND_ONCE) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_remote_port = (mach_port_t) dest_port; + exc->Head.msgh_local_port = (mach_port_t) reply_port; + + mr = ipc_kmsg_copyout_header(&exc->Head, space, + MACH_PORT_NULL); + if (mr == MACH_MSG_SUCCESS) + goto copyout_body; + + /* + * Ack! Prepare for ipc_kmsg_copyout_dest. + * It will consume thread_port and task_port. + */ + + exc->thread = (mach_port_t) thread_port; + exc->task = (mach_port_t) task_port; + + ipc_kmsg_copyout_dest(kmsg, space); + (void) ipc_kmsg_put(receiver->ith_msg, kmsg, + sizeof(mach_msg_header_t)); + thread_syscall_return(mr); + /*NOTREACHED*/ + } + + if (!ip_active(reply_port)) { + ip_unlock(reply_port); + goto abort_copyout; + } + + assert(reply_port->ip_sorights > 0); + ip_unlock(reply_port); + + { + register ipc_entry_t table; + register ipc_entry_t entry; + register mach_port_index_t index; + + /* optimized ipc_entry_get */ + + table = space->is_table; + index = table->ie_next; + + if (index == 0) + goto abort_copyout; + + entry = &table[index]; + table->ie_next = entry->ie_next; + entry->ie_request = 0; + + { + register mach_port_gen_t gen; + + assert((entry->ie_bits &~ IE_BITS_GEN_MASK) == 0); + gen = entry->ie_bits + IE_BITS_GEN_ONE; + + exc->Head.msgh_remote_port = MACH_PORT_MAKE(index, gen); + + /* optimized ipc_right_copyout */ + + entry->ie_bits = gen | (MACH_PORT_TYPE_SEND_ONCE | 1); + } + + entry->ie_object = (ipc_object_t) reply_port; + is_write_unlock(space); + } + + /* optimized ipc_object_copyout_dest */ + + assert(dest_port->ip_srights > 0); + ip_release(dest_port); + + exc->Head.msgh_local_port = + ((dest_port->ip_receiver == space) ? + dest_port->ip_receiver_name : MACH_PORT_NULL); + + if ((--dest_port->ip_srights == 0) && + (dest_port->ip_nsrequest != IP_NULL)) { + ipc_port_t nsrequest; + mach_port_mscount_t mscount; + + /* a rather rare case */ + + nsrequest = dest_port->ip_nsrequest; + mscount = dest_port->ip_mscount; + dest_port->ip_nsrequest = IP_NULL; + ip_unlock(dest_port); + + ipc_notify_no_senders(nsrequest, mscount); + } else + ip_unlock(dest_port); + + copyout_body: + /* + * Optimized version of ipc_kmsg_copyout_body, + * to handle the two ports in the body. + */ + + mr = (ipc_kmsg_copyout_object(space, (ipc_object_t) thread_port, + MACH_MSG_TYPE_PORT_SEND, &exc->thread) | + ipc_kmsg_copyout_object(space, (ipc_object_t) task_port, + MACH_MSG_TYPE_PORT_SEND, &exc->task)); + if (mr != MACH_MSG_SUCCESS) { + (void) ipc_kmsg_put(receiver->ith_msg, kmsg, + kmsg->ikm_header.msgh_size); + thread_syscall_return(mr | MACH_RCV_BODY_ERROR); + /*NOTREACHED*/ + } + } + + /* + * Optimized version of ipc_kmsg_put. + * We must check ikm_cache after copyoutmsg. + */ + + ikm_check_initialized(kmsg, kmsg->ikm_size); + assert(kmsg->ikm_size == IKM_SAVED_KMSG_SIZE); + + if (copyoutmsg((vm_offset_t) &kmsg->ikm_header, (vm_offset_t)receiver->ith_msg, + sizeof(struct mach_exception)) || + (ikm_cache() != IKM_NULL)) { + mr = ipc_kmsg_put(receiver->ith_msg, kmsg, + kmsg->ikm_header.msgh_size); + thread_syscall_return(mr); + /*NOTREACHED*/ + } + + ikm_cache() = kmsg; + thread_syscall_return(MACH_MSG_SUCCESS); + /*NOTREACHED*/ +#ifndef __GNUC__ + return; /* help for the compiler */ +#endif + + slow_exception_raise: { + register struct mach_exception *exc = + (struct mach_exception *) &kmsg->ikm_header; + ipc_kmsg_t reply_kmsg; + mach_port_seqno_t reply_seqno; + + exception_raise_misses++; + + /* + * We hold the following resources, which must be consumed: + * kmsg, send-once right and ref for reply_port + * send rights for dest_port, thread_port, task_port + * Synthesize a kmsg to send. + */ + + exc->Head.msgh_bits = (MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND, + MACH_MSG_TYPE_PORT_SEND_ONCE) | + MACH_MSGH_BITS_COMPLEX); + exc->Head.msgh_size = sizeof *exc; + exc->Head.msgh_remote_port = (mach_port_t) dest_port; + exc->Head.msgh_local_port = (mach_port_t) reply_port; + exc->Head.msgh_seqno = 0; + exc->Head.msgh_id = MACH_EXCEPTION_ID; + exc->threadType = exc_port_proto; + exc->thread = (mach_port_t) thread_port; + exc->taskType = exc_port_proto; + exc->task = (mach_port_t) task_port; + exc->exceptionType = exc_code_proto; + exc->exception = _exception; + exc->codeType = exc_code_proto; + exc->code = code; + exc->subcodeType = exc_code_proto; + exc->subcode = subcode; + + ipc_mqueue_send_always(kmsg); + + /* + * We are left with a ref for reply_port, + * which we use to receive the reply message. + */ + + ip_lock(reply_port); + if (!ip_active(reply_port)) { + ip_unlock(reply_port); + exception_raise_continue_slow(MACH_RCV_PORT_DIED, IKM_NULL, /*dummy*/0); + /*NOTREACHED*/ + return; + } + + imq_lock(reply_mqueue); + ip_unlock(reply_port); + + mr = ipc_mqueue_receive(reply_mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, exception_raise_continue, + &reply_kmsg, &reply_seqno); + /* reply_mqueue is unlocked */ + + exception_raise_continue_slow(mr, reply_kmsg, reply_seqno); + /*NOTREACHED*/ + } +} + +mach_msg_type_t exc_RetCode_proto = { + /* msgt_name = */ MACH_MSG_TYPE_INTEGER_32, + /* msgt_size = */ 32, + /* msgt_number = */ 1, + /* msgt_inline = */ TRUE, + /* msgt_longform = */ FALSE, + /* msgt_deallocate = */ FALSE, + /* msgt_unused = */ 0 +}; + +/* + * Routine: exception_parse_reply + * Purpose: + * Parse and consume an exception reply message. + * Conditions: + * The destination port right has already been consumed. + * The message buffer and anything else in it is consumed. + * Returns: + * The reply return code. + */ + +kern_return_t +exception_parse_reply(kmsg) + ipc_kmsg_t kmsg; +{ + register mig_reply_header_t *msg = + (mig_reply_header_t *) &kmsg->ikm_header; + kern_return_t kr; + + if ((msg->Head.msgh_bits != + MACH_MSGH_BITS(MACH_MSG_TYPE_PORT_SEND_ONCE, 0)) || + (msg->Head.msgh_size != sizeof *msg) || + (msg->Head.msgh_id != MACH_EXCEPTION_REPLY_ID) || + (* (int *) &msg->RetCodeType != * (int *) &exc_RetCode_proto)) { + /* + * Bozo user sent us a misformatted reply. + */ + + kmsg->ikm_header.msgh_remote_port = MACH_PORT_NULL; + ipc_kmsg_destroy(kmsg); + return MIG_REPLY_MISMATCH; + } + + kr = msg->RetCode; + + if ((kmsg->ikm_size == IKM_SAVED_KMSG_SIZE) && + (ikm_cache() == IKM_NULL)) + ikm_cache() = kmsg; + else + ikm_free(kmsg); + + return kr; +} + +/* + * Routine: exception_raise_continue + * Purpose: + * Continue after blocking for an exception. + * Conditions: + * Nothing locked. We are running on a new kernel stack, + * with the exception state saved in the thread. From here + * control goes back to user space. + * Returns: + * Doesn't return. + */ + +void +exception_raise_continue() +{ + ipc_thread_t self = current_thread(); + ipc_port_t reply_port = self->ith_port; + ipc_mqueue_t reply_mqueue = &reply_port->ip_messages; + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno; + mach_msg_return_t mr; + + mr = ipc_mqueue_receive(reply_mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + TRUE, exception_raise_continue, + &kmsg, &seqno); + /* reply_mqueue is unlocked */ + + exception_raise_continue_slow(mr, kmsg, seqno); + /*NOTREACHED*/ +} + +/* + * Routine: exception_raise_continue_slow + * Purpose: + * Continue after finishing an ipc_mqueue_receive + * for an exception reply message. + * Conditions: + * Nothing locked. We hold a ref for reply_port. + * Returns: + * Doesn't return. + */ + +void +exception_raise_continue_slow(mr, kmsg, seqno) + mach_msg_return_t mr; + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno; +{ + ipc_thread_t self = current_thread(); + ipc_port_t reply_port = self->ith_port; + ipc_mqueue_t reply_mqueue = &reply_port->ip_messages; + + while (mr == MACH_RCV_INTERRUPTED) { + /* + * Somebody is trying to force this thread + * to a clean point. We must cooperate + * and then resume the receive. + */ + + while (thread_should_halt(self)) { + /* don't terminate while holding a reference */ + if (self->ast & AST_TERMINATE) + ipc_port_release(reply_port); + thread_halt_self(); + } + + ip_lock(reply_port); + if (!ip_active(reply_port)) { + ip_unlock(reply_port); + mr = MACH_RCV_PORT_DIED; + break; + } + + imq_lock(reply_mqueue); + ip_unlock(reply_port); + + mr = ipc_mqueue_receive(reply_mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, exception_raise_continue, + &kmsg, &seqno); + /* reply_mqueue is unlocked */ + } + ipc_port_release(reply_port); + + assert((mr == MACH_MSG_SUCCESS) || + (mr == MACH_RCV_PORT_DIED)); + + if (mr == MACH_MSG_SUCCESS) { + /* + * Consume the reply message. + */ + + ipc_port_release_sonce(reply_port); + mr = exception_parse_reply(kmsg); + } + + if ((mr == KERN_SUCCESS) || + (mr == MACH_RCV_PORT_DIED)) { + thread_exception_return(); + /*NOTREACHED*/ + return; + } + + if (self->ith_exc != KERN_SUCCESS) { + exception_try_task(self->ith_exc, + self->ith_exc_code, + self->ith_exc_subcode); + /*NOTREACHED*/ + return; + } + + exception_no_server(); + /*NOTREACHED*/ +} + +/* + * Routine: exception_raise_continue_fast + * Purpose: + * Special-purpose fast continuation for exceptions. + * Conditions: + * reply_port is locked and alive. + * kmsg is our reply message. + * Returns: + * Doesn't return. + */ + +void +exception_raise_continue_fast(reply_port, kmsg) + ipc_port_t reply_port; + ipc_kmsg_t kmsg; +{ + ipc_thread_t self = current_thread(); + kern_return_t kr; + + assert(ip_active(reply_port)); + assert(reply_port == self->ith_port); + assert(reply_port == (ipc_port_t) kmsg->ikm_header.msgh_remote_port); + assert(MACH_MSGH_BITS_REMOTE(kmsg->ikm_header.msgh_bits) == + MACH_MSG_TYPE_PORT_SEND_ONCE); + + /* + * Release the send-once right (from the message header) + * and the saved reference (from self->ith_port). + */ + + reply_port->ip_sorights--; + ip_release(reply_port); + ip_release(reply_port); + ip_unlock(reply_port); + + /* + * Consume the reply message. + */ + + kr = exception_parse_reply(kmsg); + if (kr == KERN_SUCCESS) { + thread_exception_return(); + /*NOTREACHED*/ + return; /* help for the compiler */ + } + + if (self->ith_exc != KERN_SUCCESS) { + exception_try_task(self->ith_exc, + self->ith_exc_code, + self->ith_exc_subcode); + /*NOTREACHED*/ + } + + exception_no_server(); + /*NOTREACHED*/ +} diff --git a/kern/host.c b/kern/host.c new file mode 100644 index 0000000..062f923 --- /dev/null +++ b/kern/host.c @@ -0,0 +1,380 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992,1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * host.c + * + * Non-ipc host functions. + */ + +#include <cpus.h> +#include <mach_host.h> + +#include <kern/assert.h> +#include <kern/kalloc.h> +#include <kern/host.h> +#include <mach/host_info.h> +#include <mach/kern_return.h> +#include <mach/machine.h> +#include <mach/port.h> +#include <kern/processor.h> +#include <kern/ipc_host.h> + +#include <mach/vm_param.h> + + + +host_data_t realhost; + +kern_return_t host_processors( + host_t host, + processor_array_t *processor_list, + natural_t *countp) +{ + register int i; + register processor_t *tp; + vm_offset_t addr; + unsigned int count; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Determine how many processors we have. + * (This number shouldn't change.) + */ + + count = 0; + for (i = 0; i < NCPUS; i++) + if (machine_slot[i].is_cpu) + count++; + + if (count == 0) + panic("host_processors"); + + addr = kalloc((vm_size_t) (count * sizeof(mach_port_t))); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + + tp = (processor_t *) addr; + for (i = 0; i < NCPUS; i++) + if (machine_slot[i].is_cpu) + *tp++ = cpu_to_processor(i); + + *countp = count; + *processor_list = (mach_port_t *) addr; + + /* do the conversion that Mig should handle */ + + tp = (processor_t *) addr; + for (i = 0; i < count; i++) + ((mach_port_t *) tp)[i] = + (mach_port_t)convert_processor_to_port(tp[i]); + + return KERN_SUCCESS; +} + +kern_return_t host_info( + host_t host, + int flavor, + host_info_t info, + natural_t *count) +{ + register integer_t i, *slot_ptr; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + switch(flavor) { + + case HOST_BASIC_INFO: + { + register host_basic_info_t basic_info; + + /* + * Basic information about this host. + */ + if (*count < HOST_BASIC_INFO_COUNT) + return KERN_FAILURE; + + basic_info = (host_basic_info_t) info; + + basic_info->max_cpus = machine_info.max_cpus; + basic_info->avail_cpus = machine_info.avail_cpus; + basic_info->memory_size = machine_info.memory_size; + basic_info->cpu_type = + machine_slot[master_processor->slot_num].cpu_type; + basic_info->cpu_subtype = + machine_slot[master_processor->slot_num].cpu_subtype; + + *count = HOST_BASIC_INFO_COUNT; + return KERN_SUCCESS; + } + + case HOST_PROCESSOR_SLOTS: + /* + * Return numbers of slots with active processors + * in them. + */ + if (*count < NCPUS) + return KERN_INVALID_ARGUMENT; + + slot_ptr = (integer_t *)info; + *count = 0; + for (i = 0; i < NCPUS; i++) { + if (machine_slot[i].is_cpu && + machine_slot[i].running) { + *slot_ptr++ = i; + (*count)++; + } + } + return KERN_SUCCESS; + + case HOST_SCHED_INFO: + { + register host_sched_info_t sched_info; + extern int tick; /* microseconds per clock tick */ + extern int min_quantum; + /* minimum quantum, in microseconds */ + + /* + * Return scheduler information. + */ + if (*count < HOST_SCHED_INFO_COUNT) + return(KERN_FAILURE); + + sched_info = (host_sched_info_t) info; + + sched_info->min_timeout = tick / 1000; + sched_info->min_quantum = min_quantum / 1000; + /* convert microseconds to milliseconds */ + + *count = HOST_SCHED_INFO_COUNT; + return KERN_SUCCESS; + } + + case HOST_LOAD_INFO: + { + register host_load_info_t load_info; + extern long avenrun[3], mach_factor[3]; + + if (*count < HOST_LOAD_INFO_COUNT) + return KERN_FAILURE; + + load_info = (host_load_info_t) info; + + bcopy((char *) avenrun, + (char *) load_info->avenrun, + sizeof avenrun); + bcopy((char *) mach_factor, + (char *) load_info->mach_factor, + sizeof mach_factor); + + *count = HOST_LOAD_INFO_COUNT; + return KERN_SUCCESS; + } + + default: + return KERN_INVALID_ARGUMENT; + } +} + +/* + * Return kernel version string (more than you ever + * wanted to know about what version of the kernel this is). + */ + +kern_return_t host_kernel_version( + host_t host, + kernel_version_t out_version) +{ + extern char version[]; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + (void) strncpy(out_version, version, sizeof(kernel_version_t)); + + return KERN_SUCCESS; +} + +/* + * host_processor_sets: + * + * List all processor sets on the host. + */ +#if MACH_HOST +kern_return_t +host_processor_sets( + host_t host, + processor_set_name_array_t *pset_list, + natural_t *count) +{ + unsigned int actual; /* this many psets */ + processor_set_t pset; + processor_set_t *psets; + int i; + + vm_size_t size; + vm_size_t size_needed; + vm_offset_t addr; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + simple_lock(&all_psets_lock); + actual = all_psets_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(mach_port_t); + if (size_needed <= size) + break; + + /* unlock and allocate more memory */ + simple_unlock(&all_psets_lock); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the all_psets_lock */ + + psets = (processor_set_t *) addr; + + for (i = 0, pset = (processor_set_t) queue_first(&all_psets); + i < actual; + i++, pset = (processor_set_t) queue_next(&pset->all_psets)) { + /* take ref for convert_pset_name_to_port */ + pset_reference(pset); + psets[i] = pset; + } + assert(queue_end(&all_psets, (queue_entry_t) pset)); + + /* can unlock now that we've got the pset refs */ + simple_unlock(&all_psets_lock); + + /* + * Always have default port. + */ + + assert(actual > 0); + + /* if we allocated too much, must copy */ + + if (size_needed < size) { + vm_offset_t newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + for (i = 0; i < actual; i++) + pset_deallocate(psets[i]); + kfree(addr, size); + return KERN_RESOURCE_SHORTAGE; + } + + bcopy((char *) addr, (char *) newaddr, size_needed); + kfree(addr, size); + psets = (processor_set_t *) newaddr; + } + + *pset_list = (mach_port_t *) psets; + *count = actual; + + /* do the conversion that Mig should handle */ + + for (i = 0; i < actual; i++) + ((mach_port_t *) psets)[i] = + (mach_port_t)convert_pset_name_to_port(psets[i]); + + return KERN_SUCCESS; +} +#else /* MACH_HOST */ +/* + * Only one processor set, the default processor set, in this case. + */ +kern_return_t +host_processor_sets( + host_t host, + processor_set_name_array_t *pset_list, + natural_t *count) +{ + vm_offset_t addr; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Allocate memory. Can be pageable because it won't be + * touched while holding a lock. + */ + + addr = kalloc((vm_size_t) sizeof(mach_port_t)); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + + /* take for for convert_pset_name_to_port */ + pset_reference(&default_pset); + /* do the conversion that Mig should handle */ + *((mach_port_t *) addr) = + (mach_port_t) convert_pset_name_to_port(&default_pset); + + *pset_list = (mach_port_t *) addr; + *count = 1; + + return KERN_SUCCESS; +} +#endif /* MACH_HOST */ + +/* + * host_processor_set_priv: + * + * Return control port for given processor set. + */ +kern_return_t +host_processor_set_priv( + host_t host, + processor_set_t pset_name, + processor_set_t *pset) +{ + if ((host == HOST_NULL) || (pset_name == PROCESSOR_SET_NULL)) { + *pset = PROCESSOR_SET_NULL; + return KERN_INVALID_ARGUMENT; + } + + *pset = pset_name; + pset_reference(*pset); + return KERN_SUCCESS; +} diff --git a/kern/host.h b/kern/host.h new file mode 100644 index 0000000..0807f99 --- /dev/null +++ b/kern/host.h @@ -0,0 +1,48 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * kern/host.h + * + * Definitions for host data structures. + * + */ + +#ifndef _KERN_HOST_H_ +#define _KERN_HOST_H_ + +struct host { + struct ipc_port *host_self; + struct ipc_port *host_priv_self; +}; + +typedef struct host *host_t; +typedef struct host host_data_t; + +#define HOST_NULL ((host_t)0) + +extern host_data_t realhost; + +#endif _KERN_HOST_H_ diff --git a/kern/ipc_host.c b/kern/ipc_host.c new file mode 100644 index 0000000..4dbf9fc --- /dev/null +++ b/kern/ipc_host.c @@ -0,0 +1,488 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * kern/ipc_host.c + * + * Routines to implement host ports. + */ + +#include <mach/message.h> +#include <kern/host.h> +#include <kern/processor.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/ipc_host.h> +#include <kern/ipc_kobject.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> + +#include <machine/machspl.h> /* for spl */ + + + +/* + * ipc_host_init: set up various things. + */ + +void ipc_host_init(void) +{ + ipc_port_t port; + /* + * Allocate and set up the two host ports. + */ + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_host_init"); + + ipc_kobject_set(port, (ipc_kobject_t) &realhost, IKOT_HOST); + realhost.host_self = port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_host_init"); + + ipc_kobject_set(port, (ipc_kobject_t) &realhost, IKOT_HOST_PRIV); + realhost.host_priv_self = port; + + /* + * Set up ipc for default processor set. + */ + ipc_pset_init(&default_pset); + ipc_pset_enable(&default_pset); + + /* + * And for master processor + */ + ipc_processor_init(master_processor); +} + +/* + * Routine: mach_host_self [mach trap] + * Purpose: + * Give the caller send rights for his own host port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_t +mach_host_self(void) +{ + ipc_port_t sright; + + sright = ipc_port_make_send(realhost.host_self); + return ipc_port_copyout_send(sright, current_space()); +} + +#if MACH_IPC_COMPAT + +/* + * Routine: host_self [mach trap] + * Purpose: + * Give the caller send rights for his own host port. + * If new, the send right is marked with IE_BITS_COMPAT. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +port_name_t +host_self(void) +{ + ipc_port_t sright; + + sright = ipc_port_make_send(realhost.host_self); + return (port_name_t) + ipc_port_copyout_send_compat(sright, current_space()); +} + +#endif MACH_IPC_COMPAT + +/* + * ipc_processor_init: + * + * Initialize ipc access to processor by allocating port. + * Enable ipc control of processor by setting port object. + */ + +void +ipc_processor_init( + processor_t processor) +{ + ipc_port_t port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_processor_init"); + processor->processor_self = port; + ipc_kobject_set(port, (ipc_kobject_t) processor, IKOT_PROCESSOR); +} + + +/* + * ipc_pset_init: + * + * Initialize ipc control of a processor set by allocating its ports. + */ + +void +ipc_pset_init( + processor_set_t pset) +{ + ipc_port_t port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_pset_init"); + pset->pset_self = port; + + port = ipc_port_alloc_kernel(); + if (port == IP_NULL) + panic("ipc_pset_init"); + pset->pset_name_self = port; +} + +/* + * ipc_pset_enable: + * + * Enable ipc access to a processor set. + */ +void +ipc_pset_enable( + processor_set_t pset) +{ + pset_lock(pset); + if (pset->active) { + ipc_kobject_set(pset->pset_self, + (ipc_kobject_t) pset, IKOT_PSET); + ipc_kobject_set(pset->pset_name_self, + (ipc_kobject_t) pset, IKOT_PSET_NAME); + pset_ref_lock(pset); + pset->ref_count += 2; + pset_ref_unlock(pset); + } + pset_unlock(pset); +} + +/* + * ipc_pset_disable: + * + * Disable ipc access to a processor set by clearing the port objects. + * Caller must hold pset lock and a reference to the pset. Ok to + * just decrement pset reference count as a result. + */ +void +ipc_pset_disable( + processor_set_t pset) +{ + ipc_kobject_set(pset->pset_self, IKO_NULL, IKOT_NONE); + ipc_kobject_set(pset->pset_name_self, IKO_NULL, IKOT_NONE); + pset->ref_count -= 2; +} + +/* + * ipc_pset_terminate: + * + * Processor set is dead. Deallocate the ipc control structures. + */ +void +ipc_pset_terminate( + processor_set_t pset) +{ + ipc_port_dealloc_kernel(pset->pset_self); + ipc_port_dealloc_kernel(pset->pset_name_self); +} + +/* + * processor_set_default, processor_set_default_priv: + * + * Return ports for manipulating default_processor set. MiG code + * differentiates between these two routines. + */ +kern_return_t +processor_set_default( + host_t host, + processor_set_t *pset) +{ + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + *pset = &default_pset; + pset_reference(*pset); + return KERN_SUCCESS; +} + +kern_return_t +xxx_processor_set_default_priv( + host_t host, + processor_set_t *pset) +{ + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + *pset = &default_pset; + pset_reference(*pset); + return KERN_SUCCESS; +} + +/* + * Routine: convert_port_to_host + * Purpose: + * Convert from a port to a host. + * Doesn't consume the port ref; the host produced may be null. + * Conditions: + * Nothing locked. + */ + +host_t +convert_port_to_host( + ipc_port_t port) +{ + host_t host = HOST_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + ((ip_kotype(port) == IKOT_HOST) || + (ip_kotype(port) == IKOT_HOST_PRIV))) + host = (host_t) port->ip_kobject; + ip_unlock(port); + } + + return host; +} + +/* + * Routine: convert_port_to_host_priv + * Purpose: + * Convert from a port to a host. + * Doesn't consume the port ref; the host produced may be null. + * Conditions: + * Nothing locked. + */ + +host_t +convert_port_to_host_priv( + ipc_port_t port) +{ + host_t host = HOST_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_HOST_PRIV)) + host = (host_t) port->ip_kobject; + ip_unlock(port); + } + + return host; +} + +/* + * Routine: convert_port_to_processor + * Purpose: + * Convert from a port to a processor. + * Doesn't consume the port ref; + * the processor produced may be null. + * Conditions: + * Nothing locked. + */ + +processor_t +convert_port_to_processor( + ipc_port_t port) +{ + processor_t processor = PROCESSOR_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_PROCESSOR)) + processor = (processor_t) port->ip_kobject; + ip_unlock(port); + } + + return processor; +} + +/* + * Routine: convert_port_to_pset + * Purpose: + * Convert from a port to a pset. + * Doesn't consume the port ref; produces a pset ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +processor_set_t +convert_port_to_pset( + ipc_port_t port) +{ + processor_set_t pset = PROCESSOR_SET_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_PSET)) { + pset = (processor_set_t) port->ip_kobject; + pset_reference(pset); + } + ip_unlock(port); + } + + return pset; +} + +/* + * Routine: convert_port_to_pset_name + * Purpose: + * Convert from a port to a pset. + * Doesn't consume the port ref; produces a pset ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +processor_set_t +convert_port_to_pset_name( + ipc_port_t port) +{ + processor_set_t pset = PROCESSOR_SET_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + ((ip_kotype(port) == IKOT_PSET) || + (ip_kotype(port) == IKOT_PSET_NAME))) { + pset = (processor_set_t) port->ip_kobject; + pset_reference(pset); + } + ip_unlock(port); + } + + return pset; +} + +/* + * Routine: convert_host_to_port + * Purpose: + * Convert from a host to a port. + * Produces a naked send right which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_host_to_port( + host_t host) +{ + ipc_port_t port; + + port = ipc_port_make_send(host->host_self); + + return port; +} + +/* + * Routine: convert_processor_to_port + * Purpose: + * Convert from a processor to a port. + * Produces a naked send right which is always valid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_processor_to_port(processor_t processor) +{ + ipc_port_t port; + + port = ipc_port_make_send(processor->processor_self); + + return port; +} + +/* + * Routine: convert_pset_to_port + * Purpose: + * Convert from a pset to a port. + * Consumes a pset ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_pset_to_port( + processor_set_t pset) +{ + ipc_port_t port; + + pset_lock(pset); + if (pset->active) + port = ipc_port_make_send(pset->pset_self); + else + port = IP_NULL; + pset_unlock(pset); + + pset_deallocate(pset); + return port; +} + +/* + * Routine: convert_pset_name_to_port + * Purpose: + * Convert from a pset to a port. + * Consumes a pset ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_pset_name_to_port( + processor_set_t pset) +{ + ipc_port_t port; + + pset_lock(pset); + if (pset->active) + port = ipc_port_make_send(pset->pset_name_self); + else + port = IP_NULL; + pset_unlock(pset); + + pset_deallocate(pset); + return port; +} diff --git a/kern/ipc_host.h b/kern/ipc_host.h new file mode 100644 index 0000000..13c54cf --- /dev/null +++ b/kern/ipc_host.h @@ -0,0 +1,72 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_IPC_HOST_H_ +#define _KERN_IPC_HOST_H_ + +#include <mach/port.h> +#include <kern/processor.h> + +extern void ipc_host_init(void); + +extern void ipc_processor_init(processor_t); + +extern void ipc_pset_init(processor_set_t); +extern void ipc_pset_enable(processor_set_t); +extern void ipc_pset_disable(processor_set_t); +extern void ipc_pset_terminate(processor_set_t); + +extern struct host * +convert_port_to_host(struct ipc_port *); + +extern struct ipc_port * +convert_host_to_port(struct host *); + +extern struct host * +convert_port_to_host_priv(struct ipc_port *); + +extern processor_t +convert_port_to_processor(struct ipc_port *); + +extern struct ipc_port * +convert_processor_to_port(processor_t); + +extern processor_set_t +convert_port_to_pset(struct ipc_port *); + +extern struct ipc_port * +convert_pset_to_port(processor_set_t); + +extern processor_set_t +convert_port_to_pset_name(struct ipc_port *); + +extern struct ipc_port * +convert_pset_name_to_port(processor_set_t); + +#endif _KERN_IPC_HOST_H_ diff --git a/kern/ipc_kobject.c b/kern/ipc_kobject.c new file mode 100644 index 0000000..2b37205 --- /dev/null +++ b/kern/ipc_kobject.c @@ -0,0 +1,391 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: kern/ipc_kobject.c + * Author: Rich Draves + * Date: 1989 + * + * Functions for letting a port represent a kernel object. + */ + +#include <mach_debug.h> +#include <mach_ipc_test.h> +#include <mach_machine_routines.h> +#include <norma_task.h> +#include <norma_vm.h> + +#include <mach/port.h> +#include <mach/kern_return.h> +#include <mach/message.h> +#include <mach/mig_errors.h> +#include <mach/notify.h> +#include <kern/ipc_kobject.h> +#include <ipc/ipc_object.h> +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_thread.h> + +#if MACH_MACHINE_ROUTINES +#include <machine/machine_routines.h> +#endif + + +/* + * Routine: ipc_kobject_server + * Purpose: + * Handle a message sent to the kernel. + * Generates a reply message. + * Conditions: + * Nothing locked. + */ + +ipc_kmsg_t +ipc_kobject_server(request) + ipc_kmsg_t request; +{ + mach_msg_size_t reply_size = ikm_less_overhead(8192); + ipc_kmsg_t reply; + kern_return_t kr; + mig_routine_t routine; + ipc_port_t *destp; + + reply = ikm_alloc(reply_size); + if (reply == IKM_NULL) { + printf("ipc_kobject_server: dropping request\n"); + ipc_kmsg_destroy(request); + return IKM_NULL; + } + ikm_init(reply, reply_size); + + /* + * Initialize reply message. + */ + { +#define InP ((mach_msg_header_t *) &request->ikm_header) +#define OutP ((mig_reply_header_t *) &reply->ikm_header) + + static mach_msg_type_t RetCodeType = { + /* msgt_name = */ MACH_MSG_TYPE_INTEGER_32, + /* msgt_size = */ 32, + /* msgt_number = */ 1, + /* msgt_inline = */ TRUE, + /* msgt_longform = */ FALSE, + /* msgt_unused = */ 0 + }; + OutP->Head.msgh_bits = + MACH_MSGH_BITS(MACH_MSGH_BITS_LOCAL(InP->msgh_bits), 0); + OutP->Head.msgh_size = sizeof(mig_reply_header_t); + OutP->Head.msgh_remote_port = InP->msgh_local_port; + OutP->Head.msgh_local_port = MACH_PORT_NULL; + OutP->Head.msgh_seqno = 0; + OutP->Head.msgh_id = InP->msgh_id + 100; +#if 0 + if (InP->msgh_id) { + static long _calls; + static struct { long id, count; } _counts[512]; + int i, id; + + id = InP->msgh_id; + for (i = 0; i < 511; i++) { + if (_counts[i].id == 0) { + _counts[i].id = id; + _counts[i].count++; + break; + } + if (_counts[i].id == id) { + _counts[i].count++; + break; + } + } + if (i == 511) { + _counts[i].id = id; + _counts[i].count++; + } + if ((++_calls & 0x7fff) == 0) + for (i = 0; i < 512; i++) { + if (_counts[i].id == 0) + break; + printf("%d: %d\n", + _counts[i].id, _counts[i].count); + } + } +#endif + + OutP->RetCodeType = RetCodeType; + +#undef InP +#undef OutP + } + + /* + * Find the server routine to call, and call it + * to perform the kernel function + */ + { + extern mig_routine_t mach_server_routine(), + mach_port_server_routine(), + mach_host_server_routine(), + device_server_routine(), + device_pager_server_routine(), + mach4_server_routine(); +#if MACH_DEBUG + extern mig_routine_t mach_debug_server_routine(); +#endif +#if NORMA_TASK + extern mig_routine_t mach_norma_server_routine(); + extern mig_routine_t norma_internal_server_routine(); +#endif +#if NORMA_VM + extern mig_routine_t proxy_server_routine(); +#endif + +#if MACH_MACHINE_ROUTINES + extern mig_routine_t MACHINE_SERVER_ROUTINE(); +#endif + + check_simple_locks(); + if ((routine = mach_server_routine(&request->ikm_header)) != 0 + || (routine = mach_port_server_routine(&request->ikm_header)) != 0 + || (routine = mach_host_server_routine(&request->ikm_header)) != 0 + || (routine = device_server_routine(&request->ikm_header)) != 0 + || (routine = device_pager_server_routine(&request->ikm_header)) != 0 +#if MACH_DEBUG + || (routine = mach_debug_server_routine(&request->ikm_header)) != 0 +#endif MACH_DEBUG +#if NORMA_TASK + || (routine = mach_norma_server_routine(&request->ikm_header)) != 0 + || (routine = norma_internal_server_routine(&request->ikm_header)) != 0 +#endif NORMA_TASK +#if NORMA_VM + || (routine = proxy_server_routine(&request->ikm_header)) != 0 +#endif NORMA_VM + || (routine = mach4_server_routine(&request->ikm_header)) != 0 +#if MACH_MACHINE_ROUTINES + || (routine = MACHINE_SERVER_ROUTINE(&request->ikm_header)) != 0 +#endif MACH_MACHINE_ROUTINES + ) { + (*routine)(&request->ikm_header, &reply->ikm_header); + } + else if (!ipc_kobject_notify(&request->ikm_header,&reply->ikm_header)){ + ((mig_reply_header_t *) &reply->ikm_header)->RetCode + = MIG_BAD_ID; +#if MACH_IPC_TEST + printf("ipc_kobject_server: bogus kernel message, id=%d\n", + request->ikm_header.msgh_id); +#endif MACH_IPC_TEST + } + } + check_simple_locks(); + + /* + * Destroy destination. The following code differs from + * ipc_object_destroy in that we release the send-once + * right instead of generating a send-once notification + * (which would bring us here again, creating a loop). + * It also differs in that we only expect send or + * send-once rights, never receive rights. + * + * We set msgh_remote_port to IP_NULL so that the kmsg + * destroy routines don't try to destroy the port twice. + */ + destp = (ipc_port_t *) &request->ikm_header.msgh_remote_port; + switch (MACH_MSGH_BITS_REMOTE(request->ikm_header.msgh_bits)) { + case MACH_MSG_TYPE_PORT_SEND: + ipc_port_release_send(*destp); + break; + + case MACH_MSG_TYPE_PORT_SEND_ONCE: + ipc_port_release_sonce(*destp); + break; + + default: +#if MACH_ASSERT + assert(!"ipc_object_destroy: strange destination rights"); +#else + panic("ipc_object_destroy: strange destination rights"); +#endif + } + *destp = IP_NULL; + + kr = ((mig_reply_header_t *) &reply->ikm_header)->RetCode; + if ((kr == KERN_SUCCESS) || (kr == MIG_NO_REPLY)) { + /* + * The server function is responsible for the contents + * of the message. The reply port right is moved + * to the reply message, and we have deallocated + * the destination port right, so we just need + * to free the kmsg. + */ + + /* like ipc_kmsg_put, but without the copyout */ + + ikm_check_initialized(request, request->ikm_size); + if ((request->ikm_size == IKM_SAVED_KMSG_SIZE) && + (ikm_cache() == IKM_NULL)) + ikm_cache() = request; + else + ikm_free(request); + } else { + /* + * The message contents of the request are intact. + * Destroy everthing except the reply port right, + * which is needed in the reply message. + */ + + request->ikm_header.msgh_local_port = MACH_PORT_NULL; + ipc_kmsg_destroy(request); + } + + if (kr == MIG_NO_REPLY) { + /* + * The server function will send a reply message + * using the reply port right, which it has saved. + */ + + ikm_free(reply); + return IKM_NULL; + } else if (!IP_VALID((ipc_port_t)reply->ikm_header.msgh_remote_port)) { + /* + * Can't queue the reply message if the destination + * (the reply port) isn't valid. + */ + + ipc_kmsg_destroy(reply); + return IKM_NULL; + } + + return reply; +} + +/* + * Routine: ipc_kobject_set + * Purpose: + * Make a port represent a kernel object of the given type. + * The caller is responsible for handling refs for the + * kernel object, if necessary. + * Conditions: + * Nothing locked. The port must be active. + */ + +void +ipc_kobject_set(port, kobject, type) + ipc_port_t port; + ipc_kobject_t kobject; + ipc_kobject_type_t type; +{ + ip_lock(port); + assert(ip_active(port)); + port->ip_bits = (port->ip_bits &~ IO_BITS_KOTYPE) | type; + port->ip_kobject = kobject; + ip_unlock(port); +} + +/* + * Routine: ipc_kobject_destroy + * Purpose: + * Release any kernel object resources associated + * with the port, which is being destroyed. + * + * This should only be needed when resources are + * associated with a user's port. In the normal case, + * when the kernel is the receiver, the code calling + * ipc_port_dealloc_kernel should clean up the resources. + * Conditions: + * The port is not locked, but it is dead. + */ + +void +ipc_kobject_destroy( + ipc_port_t port) +{ + switch (ip_kotype(port)) { + case IKOT_PAGER: + vm_object_destroy(port); + break; + + case IKOT_PAGER_TERMINATING: + vm_object_pager_wakeup(port); + break; + + default: +#if MACH_ASSERT + printf("ipc_kobject_destroy: port 0x%x, kobj 0x%x, type %d\n", + port, port->ip_kobject, ip_kotype(port)); +#endif MACH_ASSERT + break; + } +} + +/* + * Routine: ipc_kobject_notify + * Purpose: + * Deliver notifications to kobjects that care about them. + */ + +boolean_t +ipc_kobject_notify(request_header, reply_header) + mach_msg_header_t *request_header; + mach_msg_header_t *reply_header; +{ + ipc_port_t port = (ipc_port_t) request_header->msgh_remote_port; + + ((mig_reply_header_t *) reply_header)->RetCode = MIG_NO_REPLY; + switch (request_header->msgh_id) { + case MACH_NOTIFY_PORT_DELETED: + case MACH_NOTIFY_MSG_ACCEPTED: + case MACH_NOTIFY_PORT_DESTROYED: + case MACH_NOTIFY_NO_SENDERS: + case MACH_NOTIFY_SEND_ONCE: + case MACH_NOTIFY_DEAD_NAME: + break; + + default: + return FALSE; + } + switch (ip_kotype(port)) { +#if NORMA_VM + case IKOT_XMM_OBJECT: + return xmm_object_notify(request_header); + + case IKOT_XMM_PAGER: + return xmm_pager_notify(request_header); + + case IKOT_XMM_KERNEL: + return xmm_kernel_notify(request_header); + + case IKOT_XMM_REPLY: + return xmm_reply_notify(request_header); +#endif NORMA_VM + + case IKOT_DEVICE: + return ds_notify(request_header); + + default: + return FALSE; + } +} diff --git a/kern/ipc_kobject.h b/kern/ipc_kobject.h new file mode 100644 index 0000000..91eb30f --- /dev/null +++ b/kern/ipc_kobject.h @@ -0,0 +1,118 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + */ +/* + * File: kern/ipc_kobject.h + * Author: Rich Draves + * Date: 1989 + * + * Declarations for letting a port represent a kernel object. + */ + +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_types.h> + +#ifndef _KERN_IPC_KOBJECT_H_ +#define _KERN_IPC_KOBJECT_H_ + +#include <mach/machine/vm_types.h> + +typedef vm_offset_t ipc_kobject_t; + +#define IKO_NULL ((ipc_kobject_t) 0) + +typedef unsigned int ipc_kobject_type_t; + +#define IKOT_NONE 0 +#define IKOT_THREAD 1 +#define IKOT_TASK 2 +#define IKOT_HOST 3 +#define IKOT_HOST_PRIV 4 +#define IKOT_PROCESSOR 5 +#define IKOT_PSET 6 +#define IKOT_PSET_NAME 7 +#define IKOT_PAGER 8 +#define IKOT_PAGING_REQUEST 9 +#define IKOT_DEVICE 10 +#define IKOT_XMM_OBJECT 11 +#define IKOT_XMM_PAGER 12 +#define IKOT_XMM_KERNEL 13 +#define IKOT_XMM_REPLY 14 +#define IKOT_PAGER_TERMINATING 15 +#define IKOT_PAGING_NAME 16 +#define IKOT_HOST_SECURITY 17 +#define IKOT_LEDGER 18 +#define IKOT_MASTER_DEVICE 19 +#define IKOT_ACT 20 +#define IKOT_SUBSYSTEM 21 +#define IKOT_IO_DONE_QUEUE 22 +#define IKOT_SEMAPHORE 23 +#define IKOT_LOCK_SET 24 +#define IKOT_CLOCK 25 +#define IKOT_CLOCK_CTRL 26 + /* << new entries here */ +#define IKOT_UNKNOWN 27 /* magic catchall */ +#define IKOT_MAX_TYPE 28 /* # of IKOT_ types */ + /* Please keep ipc/ipc_object.c:ikot_print_array up to date */ + +#define is_ipc_kobject(ikot) (ikot != IKOT_NONE) + +/* + * Define types of kernel objects that use page lists instead + * of entry lists for copyin of out of line memory. + */ + +#define ipc_kobject_vm_page_list(ikot) \ + ((ikot == IKOT_PAGING_REQUEST) || (ikot == IKOT_DEVICE)) + +#define ipc_kobject_vm_page_steal(ikot) (ikot == IKOT_PAGING_REQUEST) + +/* Initialize kernel server dispatch table */ +/* XXX +extern void mig_init(void); +*/ + +/* Dispatch a kernel server function */ +extern ipc_kmsg_t ipc_kobject_server( + ipc_kmsg_t request); + +/* Make a port represent a kernel object of the given type */ +extern void ipc_kobject_set( + ipc_port_t port, + ipc_kobject_t kobject, + ipc_kobject_type_t type); + +/* Release any kernel object resources associated with a port */ +extern void ipc_kobject_destroy( + ipc_port_t port); + +#define null_conversion(port) (port) + +#endif /* _KERN_IPC_KOBJECT_H_ */ diff --git a/kern/ipc_mig.c b/kern/ipc_mig.c new file mode 100644 index 0000000..ed5df1f --- /dev/null +++ b/kern/ipc_mig.c @@ -0,0 +1,1134 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <norma_vm.h> + +#include <mach/boolean.h> +#include <mach/port.h> +#include <mach/message.h> +#include <mach/thread_status.h> +#include <kern/ast.h> +#include <kern/ipc_tt.h> +#include <kern/thread.h> +#include <kern/task.h> +#include <kern/ipc_kobject.h> +#include <vm/vm_map.h> +#include <vm/vm_user.h> +#include <ipc/port.h> +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_entry.h> +#include <ipc/ipc_object.h> +#include <ipc/ipc_mqueue.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_pset.h> +#include <ipc/ipc_thread.h> +#include <device/device_types.h> + + +/* + * Routine: mach_msg_send_from_kernel + * Purpose: + * Send a message from the kernel. + * + * This is used by the client side of KernelUser interfaces + * to implement SimpleRoutines. Currently, this includes + * device_reply and memory_object messages. + * Conditions: + * Nothing locked. + * Returns: + * MACH_MSG_SUCCESS Sent the message. + * MACH_SEND_INVALID_DATA Bad destination port. + */ + +mach_msg_return_t +mach_msg_send_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size) +{ + ipc_kmsg_t kmsg; + mach_msg_return_t mr; + + if (!MACH_PORT_VALID(msg->msgh_remote_port)) + return MACH_SEND_INVALID_DEST; + + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + panic("mach_msg_send_from_kernel"); + + ipc_kmsg_copyin_from_kernel(kmsg); + ipc_mqueue_send_always(kmsg); + + return MACH_MSG_SUCCESS; +} + +mach_msg_return_t +mach_msg_rpc_from_kernel(msg, send_size, reply_size) + mach_msg_header_t *msg; + mach_msg_size_t send_size; + mach_msg_size_t reply_size; +{ + panic("mach_msg_rpc_from_kernel"); /*XXX*/ +} + +#if NORMA_VM +/* + * Routine: mach_msg_rpc_from_kernel + * Purpose: + * Send a message from the kernel and receive a reply. + * Uses ith_rpc_reply for the reply port. + * + * This is used by the client side of KernelUser interfaces + * to implement Routines. + * Conditions: + * Nothing locked. + * Returns: + * MACH_MSG_SUCCESS Sent the message. + * MACH_RCV_PORT_DIED The reply port was deallocated. + */ + +mach_msg_return_t +mach_msg_rpc_from_kernel( + mach_msg_header_t *msg, + mach_msg_size_t send_size, + mach_msg_size_t rcv_size) +{ + ipc_thread_t self = current_thread(); + ipc_port_t reply; + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno; + mach_msg_return_t mr; + + assert(MACH_PORT_VALID(msg->msgh_remote_port)); + assert(msg->msgh_local_port == MACH_PORT_NULL); + + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + panic("mach_msg_rpc_from_kernel"); + + ipc_kmsg_copyin_from_kernel(kmsg); + + ith_lock(self); + assert(self->ith_self != IP_NULL); + + reply = self->ith_rpc_reply; + if (reply == IP_NULL) { + ith_unlock(self); + reply = ipc_port_alloc_reply(); + ith_lock(self); + if ((reply == IP_NULL) || + (self->ith_rpc_reply != IP_NULL)) + panic("mach_msg_rpc_from_kernel"); + self->ith_rpc_reply = reply; + } + + /* insert send-once right for the reply port */ + kmsg->ikm_header.msgh_local_port = + (mach_port_t) ipc_port_make_sonce(reply); + + ipc_port_reference(reply); + ith_unlock(self); + + ipc_mqueue_send_always(kmsg); + + for (;;) { + ipc_mqueue_t mqueue; + + ip_lock(reply); + if (!ip_active(reply)) { + ip_unlock(reply); + ipc_port_release(reply); + return MACH_RCV_PORT_DIED; + } + + assert(reply->ip_pset == IPS_NULL); + mqueue = &reply->ip_messages; + imq_lock(mqueue); + ip_unlock(reply); + + mr = ipc_mqueue_receive(mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, IMQ_NULL_CONTINUE, + &kmsg, &seqno); + /* mqueue is unlocked */ + if (mr == MACH_MSG_SUCCESS) + break; + + assert((mr == MACH_RCV_INTERRUPTED) || + (mr == MACH_RCV_PORT_DIED)); + + while (thread_should_halt(self)) { + /* don't terminate while holding a reference */ + if (self->ast & AST_TERMINATE) + ipc_port_release(reply); + thread_halt_self(); + } + } + ipc_port_release(reply); + + kmsg->ikm_header.msgh_seqno = seqno; + + if (rcv_size < kmsg->ikm_header.msgh_size) { + ipc_kmsg_copyout_dest(kmsg, ipc_space_reply); + ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header.msgh_size); + return MACH_RCV_TOO_LARGE; + } + + /* + * We want to preserve rights and memory in reply! + * We don't have to put them anywhere; just leave them + * as they are. + */ + + ipc_kmsg_copyout_to_kernel(kmsg, ipc_space_reply); + ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header.msgh_size); + return MACH_MSG_SUCCESS; +} +#endif NORMA_VM + +/* + * Routine: mach_msg_abort_rpc + * Purpose: + * Destroy the thread's ith_rpc_reply port. + * This will interrupt a mach_msg_rpc_from_kernel + * with a MACH_RCV_PORT_DIED return code. + * Conditions: + * Nothing locked. + */ + +void +mach_msg_abort_rpc(thread) + ipc_thread_t thread; +{ + ipc_port_t reply = IP_NULL; + + ith_lock(thread); + if (thread->ith_self != IP_NULL) { + reply = thread->ith_rpc_reply; + thread->ith_rpc_reply = IP_NULL; + } + ith_unlock(thread); + + if (reply != IP_NULL) + ipc_port_dealloc_reply(reply); +} + +/* + * Routine: mach_msg + * Purpose: + * Like mach_msg_trap except that message buffers + * live in kernel space. Doesn't handle any options. + * + * This is used by in-kernel server threads to make + * kernel calls, to receive request messages, and + * to send reply messages. + * Conditions: + * Nothing locked. + * Returns: + */ + +mach_msg_return_t +mach_msg(msg, option, send_size, rcv_size, rcv_name, time_out, notify) + mach_msg_header_t *msg; + mach_msg_option_t option; + mach_msg_size_t send_size; + mach_msg_size_t rcv_size; + mach_port_t rcv_name; + mach_msg_timeout_t time_out; + mach_port_t notify; +{ + ipc_space_t space = current_space(); + vm_map_t map = current_map(); + ipc_kmsg_t kmsg; + mach_port_seqno_t seqno; + mach_msg_return_t mr; + + if (option & MACH_SEND_MSG) { + mr = ipc_kmsg_get_from_kernel(msg, send_size, &kmsg); + if (mr != MACH_MSG_SUCCESS) + panic("mach_msg"); + + mr = ipc_kmsg_copyin(kmsg, space, map, MACH_PORT_NULL); + if (mr != MACH_MSG_SUCCESS) { + ikm_free(kmsg); + return mr; + } + + do + mr = ipc_mqueue_send(kmsg, MACH_MSG_OPTION_NONE, + MACH_MSG_TIMEOUT_NONE); + while (mr == MACH_SEND_INTERRUPTED); + assert(mr == MACH_MSG_SUCCESS); + } + + if (option & MACH_RCV_MSG) { + do { + ipc_object_t object; + ipc_mqueue_t mqueue; + + mr = ipc_mqueue_copyin(space, rcv_name, + &mqueue, &object); + if (mr != MACH_MSG_SUCCESS) + return mr; + /* hold ref for object; mqueue is locked */ + + mr = ipc_mqueue_receive(mqueue, MACH_MSG_OPTION_NONE, + MACH_MSG_SIZE_MAX, + MACH_MSG_TIMEOUT_NONE, + FALSE, IMQ_NULL_CONTINUE, + &kmsg, &seqno); + /* mqueue is unlocked */ + ipc_object_release(object); + } while (mr == MACH_RCV_INTERRUPTED); + if (mr != MACH_MSG_SUCCESS) + return mr; + + kmsg->ikm_header.msgh_seqno = seqno; + + if (rcv_size < kmsg->ikm_header.msgh_size) { + ipc_kmsg_copyout_dest(kmsg, space); + ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg); + return MACH_RCV_TOO_LARGE; + } + + mr = ipc_kmsg_copyout(kmsg, space, map, MACH_PORT_NULL); + if (mr != MACH_MSG_SUCCESS) { + if ((mr &~ MACH_MSG_MASK) == MACH_RCV_BODY_ERROR) { + ipc_kmsg_put_to_kernel(msg, kmsg, + kmsg->ikm_header.msgh_size); + } else { + ipc_kmsg_copyout_dest(kmsg, space); + ipc_kmsg_put_to_kernel(msg, kmsg, sizeof *msg); + } + + return mr; + } + + ipc_kmsg_put_to_kernel(msg, kmsg, kmsg->ikm_header.msgh_size); + } + + return MACH_MSG_SUCCESS; +} + +/* + * Routine: mig_get_reply_port + * Purpose: + * Called by client side interfaces living in the kernel + * to get a reply port. This port is used for + * mach_msg() calls which are kernel calls. + */ + +mach_port_t +mig_get_reply_port(void) +{ + ipc_thread_t self = current_thread(); + + if (self->ith_mig_reply == MACH_PORT_NULL) + self->ith_mig_reply = mach_reply_port(); + + return self->ith_mig_reply; +} + +/* + * Routine: mig_dealloc_reply_port + * Purpose: + * Called by client side interfaces to get rid of a reply port. + * Shouldn't ever be called inside the kernel, because + * kernel calls shouldn't prompt Mig to call it. + */ + +void +mig_dealloc_reply_port( + mach_port_t reply_port) +{ + panic("mig_dealloc_reply_port"); +} + +/* + * Routine: mig_put_reply_port + * Purpose: + * Called by client side interfaces after each RPC to + * let the client recycle the reply port if it wishes. + */ +void +mig_put_reply_port( + mach_port_t reply_port) +{ +} + +/* + * mig_strncpy.c - by Joshua Block + * + * mig_strncp -- Bounded string copy. Does what the library routine strncpy + * OUGHT to do: Copies the (null terminated) string in src into dest, a + * buffer of length len. Assures that the copy is still null terminated + * and doesn't overflow the buffer, truncating the copy if necessary. + * + * Parameters: + * + * dest - Pointer to destination buffer. + * + * src - Pointer to source string. + * + * len - Length of destination buffer. + */ +void mig_strncpy(dest, src, len) +char *dest, *src; +int len; +{ + int i; + + if (len <= 0) + return; + + for (i=1; i<len; i++) + if (! (*dest++ = *src++)) + return; + + *dest = '\0'; + return; +} + +#define fast_send_right_lookup(name, port, abort) \ +MACRO_BEGIN \ + register ipc_space_t space = current_space(); \ + register ipc_entry_t entry; \ + register mach_port_index_t index = MACH_PORT_INDEX(name); \ + \ + is_read_lock(space); \ + assert(space->is_active); \ + \ + if ((index >= space->is_table_size) || \ + (((entry = &space->is_table[index])->ie_bits & \ + (IE_BITS_GEN_MASK|MACH_PORT_TYPE_SEND)) != \ + (MACH_PORT_GEN(name) | MACH_PORT_TYPE_SEND))) { \ + is_read_unlock(space); \ + abort; \ + } \ + \ + port = (ipc_port_t) entry->ie_object; \ + assert(port != IP_NULL); \ + \ + ip_lock(port); \ + /* can safely unlock space now that port is locked */ \ + is_read_unlock(space); \ +MACRO_END + +device_t +port_name_to_device(name) + mach_port_t name; +{ + register ipc_port_t port; + register device_t device; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + /* + * Now map the port object to a device object. + * This is an inline version of dev_port_lookup(). + */ + if (ip_active(port) && (ip_kotype(port) == IKOT_DEVICE)) { + device = (device_t) port->ip_kobject; + device_reference(device); + ip_unlock(port); + return device; + } + + ip_unlock(port); + return DEVICE_NULL; + + /* + * The slow case. The port wasn't easily accessible. + */ + abort: { + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return DEVICE_NULL; + + device = dev_port_lookup(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + return device; + } +} + +thread_t +port_name_to_thread(name) + mach_port_t name; +{ + register ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_THREAD)) { + register thread_t thread; + + thread = (thread_t) port->ip_kobject; + assert(thread != THREAD_NULL); + + /* thread referencing is a bit complicated, + so don't bother to expand inline */ + thread_reference(thread); + ip_unlock(port); + + return thread; + } + + ip_unlock(port); + return THREAD_NULL; + + abort: { + thread_t thread; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return THREAD_NULL; + + thread = convert_port_to_thread(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return thread; + } +} + +task_t +port_name_to_task(name) + mach_port_t name; +{ + register ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + register task_t task; + + task = (task_t) port->ip_kobject; + assert(task != TASK_NULL); + + task_lock(task); + /* can safely unlock port now that task is locked */ + ip_unlock(port); + + task->ref_count++; + task_unlock(task); + + return task; + } + + ip_unlock(port); + return TASK_NULL; + + abort: { + task_t task; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return TASK_NULL; + + task = convert_port_to_task(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return task; + } +} + +vm_map_t +port_name_to_map( + mach_port_t name) +{ + register ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + register vm_map_t map; + + map = ((task_t) port->ip_kobject)->map; + assert(map != VM_MAP_NULL); + + simple_lock(&map->ref_lock); + /* can safely unlock port now that map is locked */ + ip_unlock(port); + + map->ref_count++; + simple_unlock(&map->ref_lock); + + return map; + } + + ip_unlock(port); + return VM_MAP_NULL; + + abort: { + vm_map_t map; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return VM_MAP_NULL; + + map = convert_port_to_map(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return map; + } +} + +ipc_space_t +port_name_to_space(name) + mach_port_t name; +{ + register ipc_port_t port; + + fast_send_right_lookup(name, port, goto abort); + /* port is locked */ + + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + register ipc_space_t space; + + space = ((task_t) port->ip_kobject)->itk_space; + assert(space != IS_NULL); + + simple_lock(&space->is_ref_lock_data); + /* can safely unlock port now that space is locked */ + ip_unlock(port); + + space->is_references++; + simple_unlock(&space->is_ref_lock_data); + + return space; + } + + ip_unlock(port); + return IS_NULL; + + abort: { + ipc_space_t space; + ipc_port_t kern_port; + kern_return_t kr; + + kr = ipc_object_copyin(current_space(), name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &kern_port); + if (kr != KERN_SUCCESS) + return IS_NULL; + + space = convert_port_to_space(kern_port); + if (IP_VALID(kern_port)) + ipc_port_release_send(kern_port); + + return space; + } +} + +/* + * Hack to translate a thread port to a thread pointer for calling + * thread_get_state and thread_set_state. This is only necessary + * because the IPC message for these two operations overflows the + * kernel stack. + * + * AARGH! + */ + +kern_return_t thread_get_state_KERNEL(thread_port, flavor, + old_state, old_state_count) + mach_port_t thread_port; /* port right for thread */ + int flavor; + thread_state_t old_state; /* pointer to OUT array */ + natural_t *old_state_count; /* IN/OUT */ +{ + thread_t thread; + kern_return_t result; + + thread = port_name_to_thread(thread_port); + result = thread_get_state(thread, flavor, old_state, old_state_count); + thread_deallocate(thread); + + return result; +} + +kern_return_t thread_set_state_KERNEL(thread_port, flavor, + new_state, new_state_count) + mach_port_t thread_port; /* port right for thread */ + int flavor; + thread_state_t new_state; + natural_t new_state_count; +{ + thread_t thread; + kern_return_t result; + + thread = port_name_to_thread(thread_port); + result = thread_set_state(thread, flavor, new_state, new_state_count); + thread_deallocate(thread); + + return result; +} + +/* + * Things to keep in mind: + * + * The idea here is to duplicate the semantics of the true kernel RPC. + * The destination port/object should be checked first, before anything + * that the user might notice (like ipc_object_copyin). Return + * MACH_SEND_INTERRUPTED if it isn't correct, so that the user stub + * knows to fall back on an RPC. For other return values, it won't + * retry with an RPC. The retry might get a different (incorrect) rc. + * Return values are only set (and should only be set, with copyout) + * on successfull calls. + */ + +kern_return_t +syscall_vm_map( + mach_port_t target_map, + vm_offset_t *address, + vm_size_t size, + vm_offset_t mask, + boolean_t anywhere, + mach_port_t memory_object, + vm_offset_t offset, + boolean_t copy, + vm_prot_t cur_protection, + vm_prot_t max_protection, + vm_inherit_t inheritance) +{ + vm_map_t map; + ipc_port_t port; + vm_offset_t addr; + kern_return_t result; + + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + if (MACH_PORT_VALID(memory_object)) { + result = ipc_object_copyin(current_space(), memory_object, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &port); + if (result != KERN_SUCCESS) { + vm_map_deallocate(map); + return result; + } + } else + port = (ipc_port_t) memory_object; + + copyin((char *)address, (char *)&addr, sizeof(vm_offset_t)); + result = vm_map(map, &addr, size, mask, anywhere, + port, offset, copy, + cur_protection, max_protection, inheritance); + if (result == KERN_SUCCESS) + copyout((char *)&addr, (char *)address, sizeof(vm_offset_t)); + if (IP_VALID(port)) + ipc_port_release_send(port); + vm_map_deallocate(map); + + return result; +} + +kern_return_t syscall_vm_allocate(target_map, address, size, anywhere) + mach_port_t target_map; + vm_offset_t *address; + vm_size_t size; + boolean_t anywhere; +{ + vm_map_t map; + vm_offset_t addr; + kern_return_t result; + + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + copyin((char *)address, (char *)&addr, sizeof(vm_offset_t)); + result = vm_allocate(map, &addr, size, anywhere); + if (result == KERN_SUCCESS) + copyout((char *)&addr, (char *)address, sizeof(vm_offset_t)); + vm_map_deallocate(map); + + return result; +} + +kern_return_t syscall_vm_deallocate(target_map, start, size) + mach_port_t target_map; + vm_offset_t start; + vm_size_t size; +{ + vm_map_t map; + kern_return_t result; + + map = port_name_to_map(target_map); + if (map == VM_MAP_NULL) + return MACH_SEND_INTERRUPTED; + + result = vm_deallocate(map, start, size); + vm_map_deallocate(map); + + return result; +} + +kern_return_t syscall_task_create(parent_task, inherit_memory, child_task) + mach_port_t parent_task; + boolean_t inherit_memory; + mach_port_t *child_task; /* OUT */ +{ + task_t t, c; + ipc_port_t port; + mach_port_t name; + kern_return_t result; + + t = port_name_to_task(parent_task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + result = task_create(t, inherit_memory, &c); + if (result == KERN_SUCCESS) { + port = (ipc_port_t) convert_task_to_port(c); + /* always returns a name, even for non-success return codes */ + (void) ipc_kmsg_copyout_object(current_space(), + (ipc_object_t) port, + MACH_MSG_TYPE_PORT_SEND, &name); + copyout((char *)&name, (char *)child_task, + sizeof(mach_port_t)); + } + task_deallocate(t); + + return result; +} + +kern_return_t syscall_task_terminate(task) + mach_port_t task; +{ + task_t t; + kern_return_t result; + + t = port_name_to_task(task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + result = task_terminate(t); + task_deallocate(t); + + return result; +} + +kern_return_t syscall_task_suspend(task) + mach_port_t task; +{ + task_t t; + kern_return_t result; + + t = port_name_to_task(task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + result = task_suspend(t); + task_deallocate(t); + + return result; +} + +kern_return_t syscall_task_set_special_port(task, which_port, port_name) + mach_port_t task; + int which_port; + mach_port_t port_name; +{ + task_t t; + ipc_port_t port; + kern_return_t result; + + t = port_name_to_task(task); + if (t == TASK_NULL) + return MACH_SEND_INTERRUPTED; + + if (MACH_PORT_VALID(port_name)) { + result = ipc_object_copyin(current_space(), port_name, + MACH_MSG_TYPE_COPY_SEND, + (ipc_object_t *) &port); + if (result != KERN_SUCCESS) { + task_deallocate(t); + return result; + } + } else + port = (ipc_port_t) port_name; + + result = task_set_special_port(t, which_port, port); + if ((result != KERN_SUCCESS) && IP_VALID(port)) + ipc_port_release_send(port); + task_deallocate(t); + + return result; +} + +kern_return_t +syscall_mach_port_allocate(task, right, namep) + mach_port_t task; + mach_port_right_t right; + mach_port_t *namep; +{ + ipc_space_t space; + mach_port_t name; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + kr = mach_port_allocate(space, right, &name); + if (kr == KERN_SUCCESS) + copyout((char *)&name, (char *)namep, sizeof(mach_port_t)); + is_release(space); + + return kr; +} + +kern_return_t +syscall_mach_port_allocate_name(task, right, name) + mach_port_t task; + mach_port_right_t right; + mach_port_t name; +{ + ipc_space_t space; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + kr = mach_port_allocate_name(space, right, name); + is_release(space); + + return kr; +} + +kern_return_t +syscall_mach_port_deallocate(task, name) + mach_port_t task; + mach_port_t name; +{ + ipc_space_t space; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + kr = mach_port_deallocate(space, name); + is_release(space); + + return kr; +} + +kern_return_t +syscall_mach_port_insert_right(task, name, right, rightType) + mach_port_t task; + mach_port_t name; + mach_port_t right; + mach_msg_type_name_t rightType; +{ + ipc_space_t space; + ipc_object_t object; + mach_msg_type_name_t newtype; + kern_return_t kr; + + space = port_name_to_space(task); + if (space == IS_NULL) + return MACH_SEND_INTERRUPTED; + + if (!MACH_MSG_TYPE_PORT_ANY(rightType)) { + is_release(space); + return KERN_INVALID_VALUE; + } + + if (MACH_PORT_VALID(right)) { + kr = ipc_object_copyin(current_space(), right, rightType, + &object); + if (kr != KERN_SUCCESS) { + is_release(space); + return kr; + } + } else + object = (ipc_object_t) right; + newtype = ipc_object_copyin_type(rightType); + + kr = mach_port_insert_right(space, name, (ipc_port_t) object, newtype); + if ((kr != KERN_SUCCESS) && IO_VALID(object)) + ipc_object_destroy(object, newtype); + is_release(space); + + return kr; +} + +kern_return_t syscall_thread_depress_abort(thread) + mach_port_t thread; +{ + thread_t t; + kern_return_t result; + + t = port_name_to_thread(thread); + if (t == THREAD_NULL) + return MACH_SEND_INTERRUPTED; + + result = thread_depress_abort(t); + thread_deallocate(t); + + return result; +} + +/* + * Device traps -- these are way experimental. + */ + +extern io_return_t ds_device_write_trap(); +extern io_return_t ds_device_writev_trap(); + +io_return_t +syscall_device_write_request(mach_port_t device_name, + mach_port_t reply_name, + dev_mode_t mode, + recnum_t recnum, + vm_offset_t data, + vm_size_t data_count) +{ + device_t dev; + ipc_port_t reply_port; + io_return_t res; + + /* + * First try to translate the device name. + * + * If this fails, return KERN_INVALID_CAPABILITY. + * Caller knows that this most likely means that + * device is not local to node and IPC should be used. + * + * If kernel doesn't do device traps, kern_invalid() + * will be called instead of this function which will + * return KERN_INVALID_ARGUMENT. + */ + dev = port_name_to_device(device_name); + if (dev == DEVICE_NULL) + return KERN_INVALID_CAPABILITY; + + /* + * Translate reply port. + */ + if (reply_name == MACH_PORT_NULL) + reply_port = IP_NULL; + else { + /* Homey don't play that. */ + device_deallocate(dev); + return KERN_INVALID_RIGHT; + } + + /* note: doesn't take reply_port arg yet. */ + res = ds_device_write_trap(dev, /*reply_port,*/ + mode, recnum, + data, data_count); + + /* + * Give up reference from port_name_to_device. + */ + device_deallocate(dev); + return res; +} + +io_return_t +syscall_device_writev_request(mach_port_t device_name, + mach_port_t reply_name, + dev_mode_t mode, + recnum_t recnum, + io_buf_vec_t *iovec, + vm_size_t iocount) +{ + device_t dev; + ipc_port_t reply_port; + io_return_t res; + + /* + * First try to translate the device name. + * + * If this fails, return KERN_INVALID_CAPABILITY. + * Caller knows that this most likely means that + * device is not local to node and IPC should be used. + * + * If kernel doesn't do device traps, kern_invalid() + * will be called instead of this function which will + * return KERN_INVALID_ARGUMENT. + */ + dev = port_name_to_device(device_name); + if (dev == DEVICE_NULL) + return KERN_INVALID_CAPABILITY; + + /* + * Translate reply port. + */ + if (reply_name == MACH_PORT_NULL) + reply_port = IP_NULL; + else { + /* Homey don't play that. */ + device_deallocate(dev); + return KERN_INVALID_RIGHT; + } + + /* note: doesn't take reply_port arg yet. */ + res = ds_device_writev_trap(dev, /*reply_port,*/ + mode, recnum, + iovec, iocount); + + /* + * Give up reference from port_name_to_device. + */ + device_deallocate(dev); + return res; +} + + diff --git a/kern/ipc_sched.c b/kern/ipc_sched.c new file mode 100644 index 0000000..a2f4c35 --- /dev/null +++ b/kern/ipc_sched.c @@ -0,0 +1,287 @@ +/* + * Mach Operating System + * Copyright (c) 1993, 1992,1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <cpus.h> +#include <mach_host.h> + +#include <mach/message.h> +#include <kern/counters.h> +#include "cpu_number.h" +#include <kern/lock.h> +#include <kern/thread.h> +#include <kern/sched_prim.h> +#include <kern/processor.h> +#include <kern/time_out.h> +#include <kern/thread_swap.h> +#include <kern/ipc_sched.h> +#include <machine/machspl.h> /* for splsched/splx */ +#include <machine/pmap.h> + + + +/* + * These functions really belong in kern/sched_prim.c. + */ + +/* + * Routine: thread_go + * Purpose: + * Start a thread running. + * Conditions: + * IPC locks may be held. + */ + +void +thread_go( + thread_t thread) +{ + int state; + spl_t s; + + s = splsched(); + thread_lock(thread); + + reset_timeout_check(&thread->timer); + + state = thread->state; + switch (state & TH_SCHED_STATE) { + + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread->wait_result = THREAD_AWAKENED; + thread_setrun(thread, TRUE); + break; + + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Either already running, or suspended. + */ + thread->state = state & ~TH_WAIT; + thread->wait_result = THREAD_AWAKENED; + break; + + default: + /* + * Not waiting. + */ + break; + } + + thread_unlock(thread); + splx(s); +} + +/* + * Routine: thread_will_wait + * Purpose: + * Assert that the thread intends to block. + */ + +void +thread_will_wait( + thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + + assert(thread->wait_result = -1); /* for later assertions */ + thread->state |= TH_WAIT; + + thread_unlock(thread); + splx(s); +} + +/* + * Routine: thread_will_wait_with_timeout + * Purpose: + * Assert that the thread intends to block, + * with a timeout. + */ + +void +thread_will_wait_with_timeout( + thread_t thread, + mach_msg_timeout_t msecs) +{ + natural_t ticks = convert_ipc_timeout_to_ticks(msecs); + spl_t s; + + s = splsched(); + thread_lock(thread); + + assert(thread->wait_result = -1); /* for later assertions */ + thread->state |= TH_WAIT; + + set_timeout(&thread->timer, ticks); + + thread_unlock(thread); + splx(s); +} + +#if MACH_HOST +#define check_processor_set(thread) \ + (current_processor()->processor_set == (thread)->processor_set) +#else /* MACH_HOST */ +#define check_processor_set(thread) TRUE +#endif /* MACH_HOST */ + +#if NCPUS > 1 +#define check_bound_processor(thread) \ + ((thread)->bound_processor == PROCESSOR_NULL || \ + (thread)->bound_processor == current_processor()) +#else /* NCPUS > 1 */ +#define check_bound_processor(thread) TRUE +#endif /* NCPUS > 1 */ + +#ifdef CONTINUATIONS +/* + * Routine: thread_handoff + * Purpose: + * Switch to a new thread (new), leaving the current + * thread (old) blocked. If successful, moves the + * kernel stack from old to new and returns as the + * new thread. An explicit continuation for the old thread + * must be supplied. + * + * NOTE: Although we wakeup new, we don't set new->wait_result. + * Returns: + * TRUE if the handoff happened. + */ + +boolean_t +thread_handoff( + register thread_t old, + register continuation_t continuation, + register thread_t new) +{ + spl_t s; + + assert(current_thread() == old); + + /* + * XXX Dubious things here: + * I don't check the idle_count on the processor set. + * No scheduling priority or policy checks. + * I assume the new thread is interruptible. + */ + + s = splsched(); + thread_lock(new); + + /* + * The first thing we must do is check the state + * of the threads, to ensure we can handoff. + * This check uses current_processor()->processor_set, + * which we can read without locking. + */ + + if ((old->stack_privilege == current_stack()) || + (new->state != (TH_WAIT|TH_SWAPPED)) || + !check_processor_set(new) || + !check_bound_processor(new)) { + thread_unlock(new); + (void) splx(s); + + counter_always(c_thread_handoff_misses++); + return FALSE; + } + + reset_timeout_check(&new->timer); + + new->state = TH_RUN; + thread_unlock(new); + +#if NCPUS > 1 + new->last_processor = current_processor(); +#endif /* NCPUS > 1 */ + + ast_context(new, cpu_number()); + timer_switch(&new->system_timer); + + /* + * stack_handoff is machine-dependent. It does the + * machine-dependent components of a context-switch, like + * changing address spaces. It updates active_threads. + */ + + stack_handoff(old, new); + + /* + * Now we must dispose of the old thread. + * This is like thread_continue, except + * that the old thread isn't waiting yet. + */ + + thread_lock(old); + old->swap_func = continuation; + assert(old->wait_result = -1); /* for later assertions */ + + if (old->state == TH_RUN) { + /* + * This is our fast path. + */ + + old->state = TH_WAIT|TH_SWAPPED; + } + else if (old->state == (TH_RUN|TH_SUSP)) { + /* + * Somebody is trying to suspend the thread. + */ + + old->state = TH_WAIT|TH_SUSP|TH_SWAPPED; + if (old->wake_active) { + /* + * Someone wants to know when the thread + * really stops. + */ + old->wake_active = FALSE; + thread_unlock(old); + thread_wakeup((event_t)&old->wake_active); + goto after_old_thread; + } + } else + panic("thread_handoff"); + + thread_unlock(old); + after_old_thread: + (void) splx(s); + + counter_always(c_thread_handoff_hits++); + return TRUE; +} +#endif /* CONTINUATIONS */ diff --git a/kern/ipc_sched.h b/kern/ipc_sched.h new file mode 100644 index 0000000..bdee832 --- /dev/null +++ b/kern/ipc_sched.h @@ -0,0 +1,32 @@ +/* + * Mach Operating System + * Copyright (c) 1992,1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_IPC_SCHED_H_ +#define _KERN_IPC_SCHED_H_ + +#include <kern/sched_prim.h> + +#endif /* _KERN_IPC_SCHED_H_ */ diff --git a/kern/ipc_tt.c b/kern/ipc_tt.c new file mode 100644 index 0000000..b2e02d8 --- /dev/null +++ b/kern/ipc_tt.c @@ -0,0 +1,1398 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: ipc_tt.c + * Purpose: + * Task and thread related IPC functions. + */ + +#include <mach_ipc_compat.h> + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/mach_param.h> +#include <mach/task_special_ports.h> +#include <mach/thread_special_ports.h> +#include <vm/vm_kern.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/ipc_kobject.h> +#include <kern/ipc_tt.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_table.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_right.h> +#include <ipc/ipc_entry.h> +#include <ipc/ipc_object.h> + + + +/* + * Routine: ipc_task_init + * Purpose: + * Initialize a task's IPC state. + * + * If non-null, some state will be inherited from the parent. + * The parent must be appropriately initialized. + * Conditions: + * Nothing locked. + */ + +void +ipc_task_init( + task_t task, + task_t parent) +{ + ipc_space_t space; + ipc_port_t kport; + kern_return_t kr; + int i; + + + kr = ipc_space_create(&ipc_table_entries[0], &space); + if (kr != KERN_SUCCESS) + panic("ipc_task_init"); + + + kport = ipc_port_alloc_kernel(); + if (kport == IP_NULL) + panic("ipc_task_init"); + + itk_lock_init(task); + task->itk_self = kport; + task->itk_sself = ipc_port_make_send(kport); + task->itk_space = space; + + if (parent == TASK_NULL) { + task->itk_exception = IP_NULL; + task->itk_bootstrap = IP_NULL; + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + task->itk_registered[i] = IP_NULL; + } else { + itk_lock(parent); + assert(parent->itk_self != IP_NULL); + + /* inherit registered ports */ + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + task->itk_registered[i] = + ipc_port_copy_send(parent->itk_registered[i]); + + /* inherit exception and bootstrap ports */ + + task->itk_exception = + ipc_port_copy_send(parent->itk_exception); + task->itk_bootstrap = + ipc_port_copy_send(parent->itk_bootstrap); + + itk_unlock(parent); + } +} + +/* + * Routine: ipc_task_enable + * Purpose: + * Enable a task for IPC access. + * Conditions: + * Nothing locked. + */ + +void +ipc_task_enable( + task_t task) +{ + ipc_port_t kport; + + itk_lock(task); + kport = task->itk_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, (ipc_kobject_t) task, IKOT_TASK); + itk_unlock(task); +} + +/* + * Routine: ipc_task_disable + * Purpose: + * Disable IPC access to a task. + * Conditions: + * Nothing locked. + */ + +void +ipc_task_disable( + task_t task) +{ + ipc_port_t kport; + + itk_lock(task); + kport = task->itk_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); + itk_unlock(task); +} + +/* + * Routine: ipc_task_terminate + * Purpose: + * Clean up and destroy a task's IPC state. + * Conditions: + * Nothing locked. The task must be suspended. + * (Or the current thread must be in the task.) + */ + +void +ipc_task_terminate( + task_t task) +{ + ipc_port_t kport; + int i; + + itk_lock(task); + kport = task->itk_self; + + if (kport == IP_NULL) { + /* the task is already terminated (can this happen?) */ + itk_unlock(task); + return; + } + + task->itk_self = IP_NULL; + itk_unlock(task); + + /* release the naked send rights */ + + if (IP_VALID(task->itk_sself)) + ipc_port_release_send(task->itk_sself); + if (IP_VALID(task->itk_exception)) + ipc_port_release_send(task->itk_exception); + if (IP_VALID(task->itk_bootstrap)) + ipc_port_release_send(task->itk_bootstrap); + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + if (IP_VALID(task->itk_registered[i])) + ipc_port_release_send(task->itk_registered[i]); + + /* destroy the space, leaving just a reference for it */ + + ipc_space_destroy(task->itk_space); + + /* destroy the kernel port */ + + ipc_port_dealloc_kernel(kport); +} + +/* + * Routine: ipc_thread_init + * Purpose: + * Initialize a thread's IPC state. + * Conditions: + * Nothing locked. + */ + +void +ipc_thread_init(thread) + thread_t thread; +{ + ipc_port_t kport; + + kport = ipc_port_alloc_kernel(); + if (kport == IP_NULL) + panic("ipc_thread_init"); + + ipc_thread_links_init(thread); + ipc_kmsg_queue_init(&thread->ith_messages); + + ith_lock_init(thread); + thread->ith_self = kport; + thread->ith_sself = ipc_port_make_send(kport); + thread->ith_exception = IP_NULL; + + thread->ith_mig_reply = MACH_PORT_NULL; + thread->ith_rpc_reply = IP_NULL; + +#if MACH_IPC_COMPAT + { + ipc_space_t space = thread->task->itk_space; + ipc_port_t port; + mach_port_t name; + kern_return_t kr; + + kr = ipc_port_alloc_compat(space, &name, &port); + if (kr != KERN_SUCCESS) + panic("ipc_thread_init"); + /* port is locked and active */ + + /* + * Now we have a reply port. We need to make a naked + * send right to stash in ith_reply. We can't use + * ipc_port_make_send, because we can't unlock the port + * before making the right. Also we don't want to + * increment ip_mscount. The net effect of all this + * is the same as doing + * ipc_port_alloc_kernel get the port + * ipc_port_make_send make the send right + * ipc_object_copyin_from_kernel grab receive right + * ipc_object_copyout_compat and give to user + */ + + port->ip_srights++; + ip_reference(port); + ip_unlock(port); + + thread->ith_reply = port; + } +#endif MACH_IPC_COMPAT +} + +/* + * Routine: ipc_thread_enable + * Purpose: + * Enable a thread for IPC access. + * Conditions: + * Nothing locked. + */ + +void +ipc_thread_enable(thread) + thread_t thread; +{ + ipc_port_t kport; + + ith_lock(thread); + kport = thread->ith_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, (ipc_kobject_t) thread, IKOT_THREAD); + ith_unlock(thread); +} + +/* + * Routine: ipc_thread_disable + * Purpose: + * Disable IPC access to a thread. + * Conditions: + * Nothing locked. + */ + +void +ipc_thread_disable(thread) + thread_t thread; +{ + ipc_port_t kport; + + ith_lock(thread); + kport = thread->ith_self; + if (kport != IP_NULL) + ipc_kobject_set(kport, IKO_NULL, IKOT_NONE); + ith_unlock(thread); +} + +/* + * Routine: ipc_thread_terminate + * Purpose: + * Clean up and destroy a thread's IPC state. + * Conditions: + * Nothing locked. The thread must be suspended. + * (Or be the current thread.) + */ + +void +ipc_thread_terminate(thread) + thread_t thread; +{ + ipc_port_t kport; + + ith_lock(thread); + kport = thread->ith_self; + + if (kport == IP_NULL) { + /* the thread is already terminated (can this happen?) */ + ith_unlock(thread); + return; + } + + thread->ith_self = IP_NULL; + ith_unlock(thread); + + assert(ipc_kmsg_queue_empty(&thread->ith_messages)); + + /* release the naked send rights */ + + if (IP_VALID(thread->ith_sself)) + ipc_port_release_send(thread->ith_sself); + if (IP_VALID(thread->ith_exception)) + ipc_port_release_send(thread->ith_exception); + +#if MACH_IPC_COMPAT + if (IP_VALID(thread->ith_reply)) { + ipc_space_t space = thread->task->itk_space; + ipc_port_t port = thread->ith_reply; + ipc_entry_t entry; + mach_port_t name; + + /* destroy any rights the task may have for the port */ + + is_write_lock(space); + if (space->is_active && + ipc_right_reverse(space, (ipc_object_t) port, + &name, &entry)) { + /* reply port is locked and active */ + ip_unlock(port); + + (void) ipc_right_destroy(space, name, entry); + /* space is unlocked */ + } else + is_write_unlock(space); + + ipc_port_release_send(port); + } + + /* + * Note we do *not* destroy any rights the space may have + * for the thread's kernel port. The old IPC code did this, + * to avoid generating a notification when the port is + * destroyed. However, this isn't a good idea when + * the kernel port is interposed, because then it doesn't + * happen, exposing the interposition to the task. + * Because we don't need the efficiency hack, I flushed + * this behaviour, introducing a small incompatibility + * with the old IPC code. + */ +#endif MACH_IPC_COMPAT + + /* destroy the kernel port */ + + ipc_port_dealloc_kernel(kport); +} + +#if 0 +/* + * Routine: retrieve_task_self + * Purpose: + * Return a send right (possibly null/dead) + * for the task's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_self(task) + task_t task; +{ + ipc_port_t port; + + assert(task != TASK_NULL); + + itk_lock(task); + if (task->itk_self != IP_NULL) + port = ipc_port_copy_send(task->itk_sself); + else + port = IP_NULL; + itk_unlock(task); + + return port; +} + +/* + * Routine: retrieve_thread_self + * Purpose: + * Return a send right (possibly null/dead) + * for the thread's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_self(thread) + thread_t thread; +{ + ipc_port_t port; + + assert(thread != ITH_NULL); + + ith_lock(thread); + if (thread->ith_self != IP_NULL) + port = ipc_port_copy_send(thread->ith_sself); + else + port = IP_NULL; + ith_unlock(thread); + + return port; +} +#endif 0 + +/* + * Routine: retrieve_task_self_fast + * Purpose: + * Optimized version of retrieve_task_self, + * that only works for the current task. + * + * Return a send right (possibly null/dead) + * for the task's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_self_fast( + register task_t task) +{ + register ipc_port_t port; + + assert(task == current_task()); + + itk_lock(task); + assert(task->itk_self != IP_NULL); + + if ((port = task->itk_sself) == task->itk_self) { + /* no interposing */ + + ip_lock(port); + assert(ip_active(port)); + ip_reference(port); + port->ip_srights++; + ip_unlock(port); + } else + port = ipc_port_copy_send(port); + itk_unlock(task); + + return port; +} + +/* + * Routine: retrieve_thread_self_fast + * Purpose: + * Optimized version of retrieve_thread_self, + * that only works for the current thread. + * + * Return a send right (possibly null/dead) + * for the thread's user-visible self port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_self_fast(thread) + register thread_t thread; +{ + register ipc_port_t port; + + assert(thread == current_thread()); + + ith_lock(thread); + assert(thread->ith_self != IP_NULL); + + if ((port = thread->ith_sself) == thread->ith_self) { + /* no interposing */ + + ip_lock(port); + assert(ip_active(port)); + ip_reference(port); + port->ip_srights++; + ip_unlock(port); + } else + port = ipc_port_copy_send(port); + ith_unlock(thread); + + return port; +} + +#if 0 +/* + * Routine: retrieve_task_exception + * Purpose: + * Return a send right (possibly null/dead) + * for the task's exception port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_exception(task) + task_t task; +{ + ipc_port_t port; + + assert(task != TASK_NULL); + + itk_lock(task); + if (task->itk_self != IP_NULL) + port = ipc_port_copy_send(task->itk_exception); + else + port = IP_NULL; + itk_unlock(task); + + return port; +} + +/* + * Routine: retrieve_thread_exception + * Purpose: + * Return a send right (possibly null/dead) + * for the thread's exception port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_exception(thread) + thread_t thread; +{ + ipc_port_t port; + + assert(thread != ITH_NULL); + + ith_lock(thread); + if (thread->ith_self != IP_NULL) + port = ipc_port_copy_send(thread->ith_exception); + else + port = IP_NULL; + ith_unlock(thread); + + return port; +} +#endif 0 + +/* + * Routine: mach_task_self [mach trap] + * Purpose: + * Give the caller send rights for his own task port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_t +mach_task_self(void) +{ + task_t task = current_task(); + ipc_port_t sright; + + sright = retrieve_task_self_fast(task); + return ipc_port_copyout_send(sright, task->itk_space); +} + +/* + * Routine: mach_thread_self [mach trap] + * Purpose: + * Give the caller send rights for his own thread port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_t +mach_thread_self() +{ + thread_t thread = current_thread(); + task_t task = thread->task; + ipc_port_t sright; + + sright = retrieve_thread_self_fast(thread); + return ipc_port_copyout_send(sright, task->itk_space); +} + +/* + * Routine: mach_reply_port [mach trap] + * Purpose: + * Allocate a port for the caller. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +mach_port_t +mach_reply_port(void) +{ + ipc_port_t port; + mach_port_t name; + kern_return_t kr; + + kr = ipc_port_alloc(current_task()->itk_space, &name, &port); + if (kr == KERN_SUCCESS) + ip_unlock(port); + else + name = MACH_PORT_NULL; + + return name; +} + +#if MACH_IPC_COMPAT + +/* + * Routine: retrieve_task_notify + * Purpose: + * Return a reference (or null) for + * the task's notify port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_task_notify(task) + task_t task; +{ + ipc_space_t space = task->itk_space; + ipc_port_t port; + + is_read_lock(space); + if (space->is_active) { + port = space->is_notify; + if (IP_VALID(port)) + ipc_port_reference(port); + } else + port = IP_NULL; + is_read_unlock(space); + + return port; +} + +/* + * Routine: retrieve_thread_reply + * Purpose: + * Return a reference (or null) for + * the thread's reply port. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +retrieve_thread_reply(thread) + thread_t thread; +{ + ipc_port_t port; + + ith_lock(thread); + if (thread->ith_self != IP_NULL) { + port = thread->ith_reply; + if (IP_VALID(port)) + ipc_port_reference(port); + } else + port = IP_NULL; + ith_unlock(thread); + + return port; +} + +/* + * Routine: task_self [mach trap] + * Purpose: + * Give the caller send rights for his task port. + * If new, the send right is marked with IE_BITS_COMPAT. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +port_name_t +task_self() +{ + task_t task = current_task(); + ipc_port_t sright; + mach_port_t name; + + sright = retrieve_task_self_fast(task); + name = ipc_port_copyout_send_compat(sright, task->itk_space); + return (port_name_t) name; +} + +/* + * Routine: task_notify [mach trap] + * Purpose: + * Give the caller the name of his own notify port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there isn't a notify port, + * if it is dead, or if the caller doesn't hold + * receive rights for it. + */ + +port_name_t +task_notify() +{ + task_t task = current_task(); + ipc_port_t notify; + mach_port_t name; + + notify = retrieve_task_notify(task); + name = ipc_port_copyout_receiver(notify, task->itk_space); + return (port_name_t) name; +} + +/* + * Routine: thread_self [mach trap] + * Purpose: + * Give the caller send rights for his own thread port. + * If new, the send right is marked with IE_BITS_COMPAT. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there are any resource failures + * or other errors. + */ + +port_name_t +thread_self() +{ + thread_t thread = current_thread(); + task_t task = thread->task; + ipc_port_t sright; + mach_port_t name; + + sright = retrieve_thread_self_fast(thread); + name = ipc_port_copyout_send_compat(sright, task->itk_space); + return (port_name_t) name; +} + +/* + * Routine: thread_reply [mach trap] + * Purpose: + * Give the caller the name of his own reply port. + * Conditions: + * Nothing locked. + * Returns: + * MACH_PORT_NULL if there isn't a reply port, + * if it is dead, or if the caller doesn't hold + * receive rights for it. + */ + +port_name_t +thread_reply() +{ + task_t task = current_task(); + thread_t thread = current_thread(); + ipc_port_t reply; + mach_port_t name; + + reply = retrieve_thread_reply(thread); + name = ipc_port_copyout_receiver(reply, task->itk_space); + return (port_name_t) name; +} + +#endif MACH_IPC_COMPAT + +/* + * Routine: task_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the task's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_FAILURE The task/space is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +task_get_special_port( + task_t task, + int which, + ipc_port_t *portp) +{ + ipc_port_t *whichp; + ipc_port_t port; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { +#if MACH_IPC_COMPAT + case TASK_NOTIFY_PORT: { + ipc_space_t space = task->itk_space; + + is_read_lock(space); + if (!space->is_active) { + is_read_unlock(space); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(space->is_notify); + is_read_unlock(space); + + *portp = port; + return KERN_SUCCESS; + } +#endif MACH_IPC_COMPAT + + case TASK_KERNEL_PORT: + whichp = &task->itk_sself; + break; + + case TASK_EXCEPTION_PORT: + whichp = &task->itk_exception; + break; + + case TASK_BOOTSTRAP_PORT: + whichp = &task->itk_bootstrap; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(*whichp); + itk_unlock(task); + + *portp = port; + return KERN_SUCCESS; +} + +/* + * Routine: task_set_special_port [kernel call] + * Purpose: + * Changes one of the task's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_FAILURE The task/space is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +task_set_special_port( + task_t task, + int which, + ipc_port_t port) +{ + ipc_port_t *whichp; + ipc_port_t old; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { +#if MACH_IPC_COMPAT + case TASK_NOTIFY_PORT: { + ipc_space_t space = task->itk_space; + + is_write_lock(space); + if (!space->is_active) { + is_write_unlock(space); + return KERN_FAILURE; + } + + old = space->is_notify; + space->is_notify = port; + is_write_unlock(space); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; + } +#endif MACH_IPC_COMPAT + + case TASK_KERNEL_PORT: + whichp = &task->itk_sself; + break; + + case TASK_EXCEPTION_PORT: + whichp = &task->itk_exception; + break; + + case TASK_BOOTSTRAP_PORT: + whichp = &task->itk_bootstrap; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_FAILURE; + } + + old = *whichp; + *whichp = port; + itk_unlock(task); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; +} + +/* + * Routine: thread_get_special_port [kernel call] + * Purpose: + * Clones a send right for one of the thread's + * special ports. + * Conditions: + * Nothing locked. + * Returns: + * KERN_SUCCESS Extracted a send right. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +thread_get_special_port(thread, which, portp) + thread_t thread; + int which; + ipc_port_t *portp; +{ + ipc_port_t *whichp; + ipc_port_t port; + + if (thread == ITH_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { +#if MACH_IPC_COMPAT + case THREAD_REPLY_PORT: + whichp = &thread->ith_reply; + break; +#endif MACH_IPC_COMPAT + + case THREAD_KERNEL_PORT: + whichp = &thread->ith_sself; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &thread->ith_exception; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + ith_lock(thread); + if (thread->ith_self == IP_NULL) { + ith_unlock(thread); + return KERN_FAILURE; + } + + port = ipc_port_copy_send(*whichp); + ith_unlock(thread); + + *portp = port; + return KERN_SUCCESS; +} + +/* + * Routine: thread_set_special_port [kernel call] + * Purpose: + * Changes one of the thread's special ports, + * setting it to the supplied send right. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied send right. + * Returns: + * KERN_SUCCESS Changed the special port. + * KERN_INVALID_ARGUMENT The thread is null. + * KERN_FAILURE The thread is dead. + * KERN_INVALID_ARGUMENT Invalid special port. + */ + +kern_return_t +thread_set_special_port(thread, which, port) + thread_t thread; + int which; + ipc_port_t port; +{ + ipc_port_t *whichp; + ipc_port_t old; + + if (thread == ITH_NULL) + return KERN_INVALID_ARGUMENT; + + switch (which) { +#if MACH_IPC_COMPAT + case THREAD_REPLY_PORT: + whichp = &thread->ith_reply; + break; +#endif MACH_IPC_COMPAT + + case THREAD_KERNEL_PORT: + whichp = &thread->ith_sself; + break; + + case THREAD_EXCEPTION_PORT: + whichp = &thread->ith_exception; + break; + + default: + return KERN_INVALID_ARGUMENT; + } + + ith_lock(thread); + if (thread->ith_self == IP_NULL) { + ith_unlock(thread); + return KERN_FAILURE; + } + + old = *whichp; + *whichp = port; + ith_unlock(thread); + + if (IP_VALID(old)) + ipc_port_release_send(old); + return KERN_SUCCESS; +} + +/* + * Routine: mach_ports_register [kernel call] + * Purpose: + * Stash a handful of port send rights in the task. + * Child tasks will inherit these rights, but they + * must use mach_ports_lookup to acquire them. + * + * The rights are supplied in a (wired) kalloc'd segment. + * Rights which aren't supplied are assumed to be null. + * Conditions: + * Nothing locked. If successful, consumes + * the supplied rights and memory. + * Returns: + * KERN_SUCCESS Stashed the port rights. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_INVALID_ARGUMENT The task is dead. + * KERN_INVALID_ARGUMENT Too many port rights supplied. + */ + +kern_return_t +mach_ports_register( + task_t task, + mach_port_array_t memory, + mach_msg_type_number_t portsCnt) +{ + ipc_port_t ports[TASK_PORT_REGISTER_MAX]; + int i; + + if ((task == TASK_NULL) || + (portsCnt > TASK_PORT_REGISTER_MAX)) + return KERN_INVALID_ARGUMENT; + + /* + * Pad the port rights with nulls. + */ + + for (i = 0; i < portsCnt; i++) + ports[i] = memory[i]; + for (; i < TASK_PORT_REGISTER_MAX; i++) + ports[i] = IP_NULL; + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + return KERN_INVALID_ARGUMENT; + } + + /* + * Replace the old send rights with the new. + * Release the old rights after unlocking. + */ + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) { + ipc_port_t old; + + old = task->itk_registered[i]; + task->itk_registered[i] = ports[i]; + ports[i] = old; + } + + itk_unlock(task); + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + if (IP_VALID(ports[i])) + ipc_port_release_send(ports[i]); + + /* + * Now that the operation is known to be successful, + * we can free the memory. + */ + + if (portsCnt != 0) + kfree((vm_offset_t) memory, + (vm_size_t) (portsCnt * sizeof(mach_port_t))); + + return KERN_SUCCESS; +} + +/* + * Routine: mach_ports_lookup [kernel call] + * Purpose: + * Retrieves (clones) the stashed port send rights. + * Conditions: + * Nothing locked. If successful, the caller gets + * rights and memory. + * Returns: + * KERN_SUCCESS Retrieved the send rights. + * KERN_INVALID_ARGUMENT The task is null. + * KERN_INVALID_ARGUMENT The task is dead. + * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. + */ + +kern_return_t +mach_ports_lookup(task, portsp, portsCnt) + task_t task; + ipc_port_t **portsp; + mach_msg_type_number_t *portsCnt; +{ + vm_offset_t memory; + vm_size_t size; + ipc_port_t *ports; + int i; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + size = (vm_size_t) (TASK_PORT_REGISTER_MAX * sizeof(ipc_port_t)); + + memory = kalloc(size); + if (memory == 0) + return KERN_RESOURCE_SHORTAGE; + + itk_lock(task); + if (task->itk_self == IP_NULL) { + itk_unlock(task); + + kfree(memory, size); + return KERN_INVALID_ARGUMENT; + } + + ports = (ipc_port_t *) memory; + + /* + * Clone port rights. Because kalloc'd memory + * is wired, we won't fault while holding the task lock. + */ + + for (i = 0; i < TASK_PORT_REGISTER_MAX; i++) + ports[i] = ipc_port_copy_send(task->itk_registered[i]); + + itk_unlock(task); + + *portsp = (mach_port_array_t) ports; + *portsCnt = TASK_PORT_REGISTER_MAX; + return KERN_SUCCESS; +} + +/* + * Routine: convert_port_to_task + * Purpose: + * Convert from a port to a task. + * Doesn't consume the port ref; produces a task ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +task_t +convert_port_to_task( + ipc_port_t port) +{ + task_t task = TASK_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + task = (task_t) port->ip_kobject; + task_reference(task); + } + ip_unlock(port); + } + + return task; +} + +/* + * Routine: convert_port_to_space + * Purpose: + * Convert from a port to a space. + * Doesn't consume the port ref; produces a space ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +ipc_space_t +convert_port_to_space( + ipc_port_t port) +{ + ipc_space_t space = IS_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + space = ((task_t) port->ip_kobject)->itk_space; + is_reference(space); + } + ip_unlock(port); + } + + return space; +} + +/* + * Routine: convert_port_to_map + * Purpose: + * Convert from a port to a map. + * Doesn't consume the port ref; produces a map ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +vm_map_t +convert_port_to_map(port) + ipc_port_t port; +{ + vm_map_t map = VM_MAP_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_TASK)) { + map = ((task_t) port->ip_kobject)->map; + vm_map_reference(map); + } + ip_unlock(port); + } + + return map; +} + +/* + * Routine: convert_port_to_thread + * Purpose: + * Convert from a port to a thread. + * Doesn't consume the port ref; produces a thread ref, + * which may be null. + * Conditions: + * Nothing locked. + */ + +thread_t +convert_port_to_thread(port) + ipc_port_t port; +{ + thread_t thread = THREAD_NULL; + + if (IP_VALID(port)) { + ip_lock(port); + if (ip_active(port) && + (ip_kotype(port) == IKOT_THREAD)) { + thread = (thread_t) port->ip_kobject; + thread_reference(thread); + } + ip_unlock(port); + } + + return thread; +} + +/* + * Routine: convert_task_to_port + * Purpose: + * Convert from a task to a port. + * Consumes a task ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_task_to_port(task) + task_t task; +{ + ipc_port_t port; + + itk_lock(task); + if (task->itk_self != IP_NULL) + port = ipc_port_make_send(task->itk_self); + else + port = IP_NULL; + itk_unlock(task); + + task_deallocate(task); + return port; +} + +/* + * Routine: convert_thread_to_port + * Purpose: + * Convert from a thread to a port. + * Consumes a thread ref; produces a naked send right + * which may be invalid. + * Conditions: + * Nothing locked. + */ + +ipc_port_t +convert_thread_to_port(thread) + thread_t thread; +{ + ipc_port_t port; + + ith_lock(thread); + if (thread->ith_self != IP_NULL) + port = ipc_port_make_send(thread->ith_self); + else + port = IP_NULL; + ith_unlock(thread); + + thread_deallocate(thread); + return port; +} + +/* + * Routine: space_deallocate + * Purpose: + * Deallocate a space ref produced by convert_port_to_space. + * Conditions: + * Nothing locked. + */ + +void +space_deallocate(space) + ipc_space_t space; +{ + if (space != IS_NULL) + is_release(space); +} diff --git a/kern/ipc_tt.h b/kern/ipc_tt.h new file mode 100644 index 0000000..d53fb7c --- /dev/null +++ b/kern/ipc_tt.h @@ -0,0 +1,88 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_IPC_TT_H_ +#define _KERN_IPC_TT_H_ + +#include <mach/boolean.h> +#include <mach/port.h> + +extern void ipc_task_init(); +extern void ipc_task_enable(); +extern void ipc_task_disable(); +extern void ipc_task_terminate(); + +extern void ipc_thread_init(); +extern void ipc_thread_enable(); +extern void ipc_thread_disable(); +extern void ipc_thread_terminate(); + +extern struct ipc_port * +retrieve_task_self(/* task_t */); + +extern struct ipc_port * +retrieve_task_self_fast(/* task_t */); + +extern struct ipc_port * +retrieve_thread_self(/* thread_t */); + +extern struct ipc_port * +retrieve_thread_self_fast(/* thread_t */); + +extern struct ipc_port * +retrieve_task_exception(/* task_t */); + +extern struct ipc_port * +retrieve_thread_exception(/* thread_t */); + +extern struct task * +convert_port_to_task(/* struct ipc_port * */); + +extern struct ipc_port * +convert_task_to_port(/* task_t */); + +extern void +task_deallocate(/* task_t */); + +extern struct thread * +convert_port_to_thread(/* struct ipc_port * */); + +extern struct ipc_port * +convert_thread_to_port(/* thread_t */); + +extern void +thread_deallocate(/* thread_t */); + +extern struct vm_map * +convert_port_to_map(/* struct ipc_port * */); + +extern struct ipc_space * +convert_port_to_space(/* struct ipc_port * */); + +extern void +space_deallocate(/* ipc_space_t */); + +#endif _KERN_IPC_TT_H_ diff --git a/kern/kalloc.c b/kern/kalloc.c new file mode 100644 index 0000000..5390139 --- /dev/null +++ b/kern/kalloc.c @@ -0,0 +1,237 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/kalloc.c + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * General kernel memory allocator. This allocator is designed + * to be used by the kernel to manage dynamic memory fast. + */ + +#include <mach/machine/vm_types.h> +#include <mach/vm_param.h> + +#include <kern/zalloc.h> +#include <kern/kalloc.h> +#include <vm/vm_kern.h> +#include <vm/vm_object.h> +#include <vm/vm_map.h> + + + +vm_map_t kalloc_map; +vm_size_t kalloc_map_size = 8 * 1024 * 1024; +vm_size_t kalloc_max; + +/* + * All allocations of size less than kalloc_max are rounded to the + * next highest power of 2. This allocator is built on top of + * the zone allocator. A zone is created for each potential size + * that we are willing to get in small blocks. + * + * We assume that kalloc_max is not greater than 64K; + * thus 16 is a safe array size for k_zone and k_zone_name. + */ + +int first_k_zone = -1; +struct zone *k_zone[16]; +static char *k_zone_name[16] = { + "kalloc.1", "kalloc.2", + "kalloc.4", "kalloc.8", + "kalloc.16", "kalloc.32", + "kalloc.64", "kalloc.128", + "kalloc.256", "kalloc.512", + "kalloc.1024", "kalloc.2048", + "kalloc.4096", "kalloc.8192", + "kalloc.16384", "kalloc.32768" +}; + +/* + * Max number of elements per zone. zinit rounds things up correctly + * Doing things this way permits each zone to have a different maximum size + * based on need, rather than just guessing; it also + * means its patchable in case you're wrong! + */ +unsigned long k_zone_max[16] = { + 1024, /* 1 Byte */ + 1024, /* 2 Byte */ + 1024, /* 4 Byte */ + 1024, /* 8 Byte */ + 1024, /* 16 Byte */ + 4096, /* 32 Byte */ + 4096, /* 64 Byte */ + 4096, /* 128 Byte */ + 4096, /* 256 Byte */ + 1024, /* 512 Byte */ + 1024, /* 1024 Byte */ + 1024, /* 2048 Byte */ + 1024, /* 4096 Byte */ + 4096, /* 8192 Byte */ + 64, /* 16384 Byte */ + 64, /* 32768 Byte */ +}; + +/* + * Initialize the memory allocator. This should be called only + * once on a system wide basis (i.e. first processor to get here + * does the initialization). + * + * This initializes all of the zones. + */ + +void kalloc_init() +{ + vm_offset_t min, max; + vm_size_t size; + register int i; + + kalloc_map = kmem_suballoc(kernel_map, &min, &max, + kalloc_map_size, FALSE); + + /* + * Ensure that zones up to size 8192 bytes exist. + * This is desirable because messages are allocated + * with kalloc, and messages up through size 8192 are common. + */ + + if (PAGE_SIZE < 16*1024) + kalloc_max = 16*1024; + else + kalloc_max = PAGE_SIZE; + + /* + * Allocate a zone for each size we are going to handle. + * We specify non-paged memory. + */ + for (i = 0, size = 1; size < kalloc_max; i++, size <<= 1) { + if (size < MINSIZE) { + k_zone[i] = 0; + continue; + } + if (size == MINSIZE) { + first_k_zone = i; + } + k_zone[i] = zinit(size, k_zone_max[i] * size, size, + size >= PAGE_SIZE ? ZONE_COLLECTABLE : 0, + k_zone_name[i]); + } +} + +vm_offset_t kalloc(size) + vm_size_t size; +{ + register int zindex; + register vm_size_t allocsize; + vm_offset_t addr; + + /* compute the size of the block that we will actually allocate */ + + allocsize = size; + if (size < kalloc_max) { + allocsize = MINSIZE; + zindex = first_k_zone; + while (allocsize < size) { + allocsize <<= 1; + zindex++; + } + } + + /* + * If our size is still small enough, check the queue for that size + * and allocate. + */ + + if (allocsize < kalloc_max) { + addr = zalloc(k_zone[zindex]); + } else { + if (kmem_alloc_wired(kalloc_map, &addr, allocsize) + != KERN_SUCCESS) + addr = 0; + } + return(addr); +} + +vm_offset_t kget(size) + vm_size_t size; +{ + register int zindex; + register vm_size_t allocsize; + vm_offset_t addr; + + /* compute the size of the block that we will actually allocate */ + + allocsize = size; + if (size < kalloc_max) { + allocsize = MINSIZE; + zindex = first_k_zone; + while (allocsize < size) { + allocsize <<= 1; + zindex++; + } + } + + /* + * If our size is still small enough, check the queue for that size + * and allocate. + */ + + if (allocsize < kalloc_max) { + addr = zget(k_zone[zindex]); + } else { + /* This will never work, so we might as well panic */ + panic("kget"); + } + return(addr); +} + +void +kfree(data, size) + vm_offset_t data; + vm_size_t size; +{ + register int zindex; + register vm_size_t freesize; + + freesize = size; + if (size < kalloc_max) { + freesize = MINSIZE; + zindex = first_k_zone; + while (freesize < size) { + freesize <<= 1; + zindex++; + } + } + + if (freesize < kalloc_max) { + zfree(k_zone[zindex], data); + } else { + kmem_free(kalloc_map, data, freesize); + } +} diff --git a/kern/kalloc.h b/kern/kalloc.h new file mode 100644 index 0000000..f36e4dc --- /dev/null +++ b/kern/kalloc.h @@ -0,0 +1,40 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_KALLOC_H_ +#define _KERN_KALLOC_H_ + +#include <mach/machine/vm_types.h> + +#define MINSIZE 16 + +extern vm_offset_t kalloc(); +extern vm_offset_t kget(); +extern void kfree(); + +extern void kalloc_init(); + +#endif _KERN_KALLOC_H_ diff --git a/kern/kern_types.h b/kern/kern_types.h new file mode 100644 index 0000000..f715cb1 --- /dev/null +++ b/kern/kern_types.h @@ -0,0 +1,70 @@ +/* + * Mach Operating System + * Copyright (c) 1992 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_KERN_TYPES_H_ +#define _KERN_KERN_TYPES_H_ + +#include <mach/port.h> /* for mach_port_t */ + +/* + * Common kernel type declarations. + * These are handles to opaque data structures defined elsewhere. + * + * These types are recursively included in each other`s definitions. + * This file exists to export the common declarations to each + * of the definitions, and to other files that need only the + * type declarations. + */ + +/* + * Task structure, from kern/task.h + */ +typedef struct task * task_t; +#define TASK_NULL ((task_t) 0) + +typedef mach_port_t * task_array_t; /* should be task_t * */ + +/* + * Thread structure, from kern/thread.h + */ +typedef struct thread * thread_t; +#define THREAD_NULL ((thread_t) 0) + +typedef mach_port_t * thread_array_t; /* should be thread_t * */ + +/* + * Processor structure, from kern/processor.h + */ +typedef struct processor * processor_t; +#define PROCESSOR_NULL ((processor_t) 0) + +/* + * Processor set structure, from kern/processor.h + */ +typedef struct processor_set * processor_set_t; +#define PROCESSOR_SET_NULL ((processor_set_t) 0) + +#endif /* _KERN_KERN_TYPES_H_ */ diff --git a/kern/lock.c b/kern/lock.c new file mode 100644 index 0000000..4d88153 --- /dev/null +++ b/kern/lock.c @@ -0,0 +1,637 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/lock.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Locking primitives implementation + */ + +#include <cpus.h> +#include <mach_kdb.h> + +#include <kern/lock.h> +#include <kern/thread.h> +#include <kern/sched_prim.h> +#if MACH_KDB +#include <machine/db_machdep.h> +#include <ddb/db_sym.h> +#endif + + +#if NCPUS > 1 + +/* + * Module: lock + * Function: + * Provide reader/writer sychronization. + * Implementation: + * Simple interlock on a bit. Readers first interlock, + * increment the reader count, then let go. Writers hold + * the interlock (thus preventing further readers), and + * wait for already-accepted readers to go away. + */ + +/* + * The simple-lock routines are the primitives out of which + * the lock package is built. The implementation is left + * to the machine-dependent code. + */ + +#ifdef notdef +/* + * A sample implementation of simple locks. + * assumes: + * boolean_t test_and_set(boolean_t *) + * indivisibly sets the boolean to TRUE + * and returns its old value + * and that setting a boolean to FALSE is indivisible. + */ +/* + * simple_lock_init initializes a simple lock. A simple lock + * may only be used for exclusive locks. + */ + +void simple_lock_init(simple_lock_t l) +{ + *(boolean_t *)l = FALSE; +} + +void simple_lock(simple_lock_t l) +{ + while (test_and_set((boolean_t *)l)) + continue; +} + +void simple_unlock(simple_lock_t l) +{ + *(boolean_t *)l = FALSE; +} + +boolean_t simple_lock_try(simple_lock_t l) +{ + return (!test_and_set((boolean_t *)l)); +} +#endif /* notdef */ +#endif /* NCPUS > 1 */ + +#if NCPUS > 1 +int lock_wait_time = 100; +#else /* NCPUS > 1 */ + + /* + * It is silly to spin on a uni-processor as if we + * thought something magical would happen to the + * want_write bit while we are executing. + */ +int lock_wait_time = 0; +#endif /* NCPUS > 1 */ + +#if MACH_SLOCKS && NCPUS == 1 +/* + * This code does not protect simple_locks_taken and simple_locks_info. + * It works despite the fact that interrupt code does use simple locks. + * This is because interrupts use locks in a stack-like manner. + * Each interrupt releases all the locks it acquires, so the data + * structures end up in the same state after the interrupt as before. + * The only precaution necessary is that simple_locks_taken be + * incremented first and decremented last, so that interrupt handlers + * don't over-write active slots in simple_locks_info. + */ + +unsigned int simple_locks_taken = 0; + +#define NSLINFO 1000 /* maximum number of locks held */ + +struct simple_locks_info { + simple_lock_t l; + unsigned int ra; +} simple_locks_info[NSLINFO]; + +void check_simple_locks(void) +{ + assert(simple_locks_taken == 0); +} + +/* Need simple lock sanity checking code if simple locks are being + compiled in, and we are compiling for a uniprocessor. */ + +void simple_lock_init( + simple_lock_t l) +{ + l->lock_data = 0; +} + +void simple_lock( + simple_lock_t l) +{ + struct simple_locks_info *info; + + assert(l->lock_data == 0); + + l->lock_data = 1; + + info = &simple_locks_info[simple_locks_taken++]; + info->l = l; + /* XXX we want our return address, if possible */ +#ifdef i386 + info->ra = *((unsigned int *)&l - 1); +#endif /* i386 */ +} + +boolean_t simple_lock_try( + simple_lock_t l) +{ + struct simple_locks_info *info; + + if (l->lock_data != 0) + return FALSE; + + l->lock_data = 1; + + info = &simple_locks_info[simple_locks_taken++]; + info->l = l; + /* XXX we want our return address, if possible */ +#ifdef i386 + info->ra = *((unsigned int *)&l - 1); +#endif /* i386 */ + + return TRUE; +} + +void simple_unlock( + simple_lock_t l) +{ + assert(l->lock_data != 0); + + l->lock_data = 0; + + if (simple_locks_info[simple_locks_taken-1].l != l) { + unsigned int i = simple_locks_taken; + + /* out-of-order unlocking */ + + do + if (i == 0) + panic("simple_unlock"); + while (simple_locks_info[--i].l != l); + + simple_locks_info[i] = simple_locks_info[simple_locks_taken-1]; + } + simple_locks_taken--; +} + +#endif /* MACH_SLOCKS && NCPUS == 1 */ + +/* + * Routine: lock_init + * Function: + * Initialize a lock; required before use. + * Note that clients declare the "struct lock" + * variables and then initialize them, rather + * than getting a new one from this module. + */ +void lock_init( + lock_t l, + boolean_t can_sleep) +{ + bzero((char *)l, sizeof(lock_data_t)); + simple_lock_init(&l->interlock); + l->want_write = FALSE; + l->want_upgrade = FALSE; + l->read_count = 0; + l->can_sleep = can_sleep; + l->thread = (struct thread *)-1; /* XXX */ + l->recursion_depth = 0; +} + +void lock_sleepable( + lock_t l, + boolean_t can_sleep) +{ + simple_lock(&l->interlock); + l->can_sleep = can_sleep; + simple_unlock(&l->interlock); +} + + +/* + * Sleep locks. These use the same data structure and algorithm + * as the spin locks, but the process sleeps while it is waiting + * for the lock. These work on uniprocessor systems. + */ + +void lock_write( + register lock_t l) +{ + register int i; + + check_simple_locks(); + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock. + */ + l->recursion_depth++; + simple_unlock(&l->interlock); + return; + } + + /* + * Try to acquire the want_write bit. + */ + while (l->want_write) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && l->want_write) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && l->want_write) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + l->want_write = TRUE; + + /* Wait for readers (and upgrades) to finish */ + + while ((l->read_count != 0) || l->want_upgrade) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && (l->read_count != 0 || + l->want_upgrade)) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + simple_unlock(&l->interlock); +} + +void lock_done( + register lock_t l) +{ + simple_lock(&l->interlock); + + if (l->read_count != 0) + l->read_count--; + else + if (l->recursion_depth != 0) + l->recursion_depth--; + else + if (l->want_upgrade) + l->want_upgrade = FALSE; + else + l->want_write = FALSE; + + /* + * There is no reason to wakeup a waiting thread + * if the read-count is non-zero. Consider: + * we must be dropping a read lock + * threads are waiting only if one wants a write lock + * if there are still readers, they can't proceed + */ + + if (l->waiting && (l->read_count == 0)) { + l->waiting = FALSE; + thread_wakeup(l); + } + + simple_unlock(&l->interlock); +} + +void lock_read( + register lock_t l) +{ + register int i; + + check_simple_locks(); + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock. + */ + l->read_count++; + simple_unlock(&l->interlock); + return; + } + + while (l->want_write || l->want_upgrade) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && (l->want_write || l->want_upgrade)) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && (l->want_write || l->want_upgrade)) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + + l->read_count++; + simple_unlock(&l->interlock); +} + +/* + * Routine: lock_read_to_write + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * no lock is held upon return. + * + * Returns TRUE if the upgrade *failed*. + */ +boolean_t lock_read_to_write( + register lock_t l) +{ + register int i; + + check_simple_locks(); + simple_lock(&l->interlock); + + l->read_count--; + + if (l->thread == current_thread()) { + /* + * Recursive lock. + */ + l->recursion_depth++; + simple_unlock(&l->interlock); + return(FALSE); + } + + if (l->want_upgrade) { + /* + * Someone else has requested upgrade. + * Since we've released a read lock, wake + * him up. + */ + if (l->waiting && (l->read_count == 0)) { + l->waiting = FALSE; + thread_wakeup(l); + } + + simple_unlock(&l->interlock); + return TRUE; + } + + l->want_upgrade = TRUE; + + while (l->read_count != 0) { + if ((i = lock_wait_time) > 0) { + simple_unlock(&l->interlock); + while (--i > 0 && l->read_count != 0) + continue; + simple_lock(&l->interlock); + } + + if (l->can_sleep && l->read_count != 0) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + } + + simple_unlock(&l->interlock); + return FALSE; +} + +void lock_write_to_read( + register lock_t l) +{ + simple_lock(&l->interlock); + + l->read_count++; + if (l->recursion_depth != 0) + l->recursion_depth--; + else + if (l->want_upgrade) + l->want_upgrade = FALSE; + else + l->want_write = FALSE; + + if (l->waiting) { + l->waiting = FALSE; + thread_wakeup(l); + } + + simple_unlock(&l->interlock); +} + + +/* + * Routine: lock_try_write + * Function: + * Tries to get a write lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t lock_try_write( + register lock_t l) +{ + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock + */ + l->recursion_depth++; + simple_unlock(&l->interlock); + return TRUE; + } + + if (l->want_write || l->want_upgrade || l->read_count) { + /* + * Can't get lock. + */ + simple_unlock(&l->interlock); + return FALSE; + } + + /* + * Have lock. + */ + + l->want_write = TRUE; + simple_unlock(&l->interlock); + return TRUE; +} + +/* + * Routine: lock_try_read + * Function: + * Tries to get a read lock. + * + * Returns FALSE if the lock is not held on return. + */ + +boolean_t lock_try_read( + register lock_t l) +{ + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock + */ + l->read_count++; + simple_unlock(&l->interlock); + return TRUE; + } + + if (l->want_write || l->want_upgrade) { + simple_unlock(&l->interlock); + return FALSE; + } + + l->read_count++; + simple_unlock(&l->interlock); + return TRUE; +} + +/* + * Routine: lock_try_read_to_write + * Function: + * Improves a read-only lock to one with + * write permission. If another reader has + * already requested an upgrade to a write lock, + * the read lock is still held upon return. + * + * Returns FALSE if the upgrade *failed*. + */ +boolean_t lock_try_read_to_write( + register lock_t l) +{ + check_simple_locks(); + simple_lock(&l->interlock); + + if (l->thread == current_thread()) { + /* + * Recursive lock + */ + l->read_count--; + l->recursion_depth++; + simple_unlock(&l->interlock); + return TRUE; + } + + if (l->want_upgrade) { + simple_unlock(&l->interlock); + return FALSE; + } + l->want_upgrade = TRUE; + l->read_count--; + + while (l->read_count != 0) { + l->waiting = TRUE; + thread_sleep(l, + simple_lock_addr(l->interlock), FALSE); + simple_lock(&l->interlock); + } + + simple_unlock(&l->interlock); + return TRUE; +} + +/* + * Allow a process that has a lock for write to acquire it + * recursively (for read, write, or update). + */ +void lock_set_recursive( + lock_t l) +{ + simple_lock(&l->interlock); + if (!l->want_write) { + panic("lock_set_recursive: don't have write lock"); + } + l->thread = current_thread(); + simple_unlock(&l->interlock); +} + +/* + * Prevent a lock from being re-acquired. + */ +void lock_clear_recursive( + lock_t l) +{ + simple_lock(&l->interlock); + if (l->thread != current_thread()) { + panic("lock_clear_recursive: wrong thread"); + } + if (l->recursion_depth == 0) + l->thread = (struct thread *)-1; /* XXX */ + simple_unlock(&l->interlock); +} + +#if MACH_KDB +#if MACH_SLOCKS && NCPUS == 1 +void db_show_all_slocks(void) +{ + int i; + struct simple_locks_info *info; + simple_lock_t l; + + for (i = 0; i < simple_locks_taken; i++) { + info = &simple_locks_info[i]; + db_printf("%d: ", i); + db_printsym(info->l, DB_STGY_ANY); +#if i386 + db_printf(" locked by "); + db_printsym(info->ra, DB_STGY_PROC); +#endif + db_printf("\n"); + } +} +#else /* MACH_SLOCKS && NCPUS == 1 */ +void db_show_all_slocks(void) +{ + db_printf("simple lock info not available\n"); +} +#endif /* MACH_SLOCKS && NCPUS == 1 */ +#endif /* MACH_KDB */ diff --git a/kern/lock.h b/kern/lock.h new file mode 100644 index 0000000..9be63c5 --- /dev/null +++ b/kern/lock.h @@ -0,0 +1,177 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/lock.h + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1985 + * + * Locking primitives definitions + */ + +#ifndef _KERN_LOCK_H_ +#define _KERN_LOCK_H_ + +#include <cpus.h> +#include <mach_ldebug.h> + +#include <mach/boolean.h> +#include <mach/machine/vm_types.h> + +#if NCPUS > 1 +#include <machine/lock.h>/*XXX*/ +#endif + +#define MACH_SLOCKS ((NCPUS > 1) || MACH_LDEBUG) + +/* + * A simple spin lock. + */ + +struct slock { + volatile natural_t lock_data; /* in general 1 bit is sufficient */ +}; + +typedef struct slock simple_lock_data_t; +typedef struct slock *simple_lock_t; + +#if MACH_SLOCKS +/* + * Use the locks. + */ + +#define decl_simple_lock_data(class,name) \ +class simple_lock_data_t name; + +#define simple_lock_addr(lock) (&(lock)) + +#if (NCPUS > 1) + +/* + * The single-CPU debugging routines are not valid + * on a multiprocessor. + */ +#define simple_lock_taken(lock) (1) /* always succeeds */ +#define check_simple_locks() + +#else /* NCPUS > 1 */ +/* + * Use our single-CPU locking test routines. + */ + +extern void simple_lock_init(simple_lock_t); +extern void simple_lock(simple_lock_t); +extern void simple_unlock(simple_lock_t); +extern boolean_t simple_lock_try(simple_lock_t); + +#define simple_lock_pause() +#define simple_lock_taken(lock) ((lock)->lock_data) + +extern void check_simple_locks(void); + +#endif /* NCPUS > 1 */ + +#else /* MACH_SLOCKS */ +/* + * Do not allocate storage for locks if not needed. + */ +#define decl_simple_lock_data(class,name) +#define simple_lock_addr(lock) ((simple_lock_t)0) + +/* + * No multiprocessor locking is necessary. + */ +#define simple_lock_init(l) +#define simple_lock(l) +#define simple_unlock(l) +#define simple_lock_try(l) (TRUE) /* always succeeds */ +#define simple_lock_taken(l) (1) /* always succeeds */ +#define check_simple_locks() +#define simple_lock_pause() + +#endif /* MACH_SLOCKS */ + + +#define decl_mutex_data(class,name) decl_simple_lock_data(class,name) +#define mutex_try(l) simple_lock_try(l) +#define mutex_lock(l) simple_lock(l) +#define mutex_unlock(l) simple_unlock(l) +#define mutex_init(l) simple_lock_init(l) + + +/* + * The general lock structure. Provides for multiple readers, + * upgrading from read to write, and sleeping until the lock + * can be gained. + * + * On some architectures, assembly language code in the 'inline' + * program fiddles the lock structures. It must be changed in + * concert with the structure layout. + * + * Only the "interlock" field is used for hardware exclusion; + * other fields are modified with normal instructions after + * acquiring the interlock bit. + */ +struct lock { + struct thread *thread; /* Thread that has lock, if + recursive locking allowed */ + unsigned int read_count:16, /* Number of accepted readers */ + /* boolean_t */ want_upgrade:1, /* Read-to-write upgrade waiting */ + /* boolean_t */ want_write:1, /* Writer is waiting, or + locked for write */ + /* boolean_t */ waiting:1, /* Someone is sleeping on lock */ + /* boolean_t */ can_sleep:1, /* Can attempts to lock go to sleep? */ + recursion_depth:12, /* Depth of recursion */ + :0; + decl_simple_lock_data(,interlock) + /* Hardware interlock field. + Last in the structure so that + field offsets are the same whether + or not it is present. */ +}; + +typedef struct lock lock_data_t; +typedef struct lock *lock_t; + +/* Sleep locks must work even if no multiprocessing */ + +extern void lock_init(lock_t, boolean_t); +extern void lock_sleepable(lock_t, boolean_t); +extern void lock_write(lock_t); +extern void lock_read(lock_t); +extern void lock_done(lock_t); +extern boolean_t lock_read_to_write(lock_t); +extern void lock_write_to_read(lock_t); +extern boolean_t lock_try_write(lock_t); +extern boolean_t lock_try_read(lock_t); +extern boolean_t lock_try_read_to_write(lock_t); + +#define lock_read_done(l) lock_done(l) +#define lock_write_done(l) lock_done(l) + +extern void lock_set_recursive(lock_t); +extern void lock_clear_recursive(lock_t); + +#endif /* _KERN_LOCK_H_ */ diff --git a/kern/lock_mon.c b/kern/lock_mon.c new file mode 100644 index 0000000..ef44329 --- /dev/null +++ b/kern/lock_mon.c @@ -0,0 +1,375 @@ +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ +/* + * Copyright 1990 by Open Software Foundation, + * Grenoble, FRANCE + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of OSF or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, + * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, + * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Support For MP Debugging + * if MACH_MP_DEBUG is on, we use alternate locking + * routines do detect dealocks + * Support for MP lock monitoring (MACH_LOCK_MON). + * Registers use of locks, contention. + * Depending on hardware also records time spent with locks held + */ + +#include <cpus.h> +#include <mach_mp_debug.h> +#include <mach_lock_mon.h> +#include <time_stamp.h> + +#include <sys/types.h> +#include <mach/machine/vm_types.h> +#include <mach/boolean.h> +#include <kern/thread.h> +#include <kern/lock.h> +#include <kern/time_stamp.h> + + +decl_simple_lock_data(extern , kdb_lock) +decl_simple_lock_data(extern , printf_lock) + +#if NCPUS > 1 && MACH_LOCK_MON + +#if TIME_STAMP +extern time_stamp_t time_stamp; +#else TIME_STAMP +typedef unsigned int time_stamp_t; +#define time_stamp 0 +#endif TIME_STAMP + +#define LOCK_INFO_MAX (1024*32) +#define LOCK_INFO_HASH_COUNT 1024 +#define LOCK_INFO_PER_BUCKET (LOCK_INFO_MAX/LOCK_INFO_HASH_COUNT) + + +#define HASH_LOCK(lock) ((long)lock>>5 & (LOCK_INFO_HASH_COUNT-1)) + +struct lock_info { + unsigned int success; + unsigned int fail; + unsigned int masked; + unsigned int stack; + time_stamp_t time; + decl_simple_lock_data(, *lock) + vm_offset_t caller; +}; + +struct lock_info_bucket { + struct lock_info info[LOCK_INFO_PER_BUCKET]; +}; + +struct lock_info_bucket lock_info[LOCK_INFO_HASH_COUNT]; +struct lock_info default_lock_info; +unsigned default_lock_stack = 0; + +extern int curr_ipl[]; + + + +struct lock_info * +locate_lock_info(lock) +decl_simple_lock_data(, **lock) +{ + struct lock_info *li = &(lock_info[HASH_LOCK(*lock)].info[0]); + register i; + register my_cpu = cpu_number(); + + for (i=0; i < LOCK_INFO_PER_BUCKET; i++, li++) + if (li->lock) { + if (li->lock == *lock) + return(li); + } else { + li->lock = *lock; + li->caller = *((vm_offset_t *)lock - 1); + return(li); + } + db_printf("out of lock_info slots\n"); + li = &default_lock_info; + return(li); +} + + +simple_lock(lock) +decl_simple_lock_data(, *lock) +{ + register struct lock_info *li = locate_lock_info(&lock); + register my_cpu = cpu_number(); + + if (current_thread()) + li->stack = current_thread()->lock_stack++; + if (curr_ipl[my_cpu]) + li->masked++; + if (_simple_lock_try(lock)) + li->success++; + else { + _simple_lock(lock); + li->fail++; + } + li->time = time_stamp - li->time; +} + +simple_lock_try(lock) +decl_simple_lock_data(, *lock) +{ + register struct lock_info *li = locate_lock_info(&lock); + register my_cpu = cpu_number(); + + if (curr_ipl[my_cpu]) + li->masked++; + if (_simple_lock_try(lock)) { + li->success++; + li->time = time_stamp - li->time; + if (current_thread()) + li->stack = current_thread()->lock_stack++; + return(1); + } else { + li->fail++; + return(0); + } +} + +simple_unlock(lock) +decl_simple_lock_data(, *lock) +{ + register time_stamp_t stamp = time_stamp; + register time_stamp_t *time = &locate_lock_info(&lock)->time; + register unsigned *lock_stack; + + *time = stamp - *time; + _simple_unlock(lock); + if (current_thread()) { + lock_stack = ¤t_thread()->lock_stack; + if (*lock_stack) + (*lock_stack)--; + } +} + +lip() { + lis(4, 1, 0); +} + +#define lock_info_sort lis + +unsigned scurval, ssum; +struct lock_info *sli; + +lock_info_sort(arg, abs, count) +{ + struct lock_info *li, mean; + int bucket = 0; + int i; + unsigned max_val; + unsigned old_val = (unsigned)-1; + struct lock_info *target_li = &lock_info[0].info[0]; + unsigned sum; + unsigned empty, total; + unsigned curval; + + printf("\nSUCCESS FAIL MASKED STACK TIME LOCK/CALLER\n"); + if (!count) + count = 8 ; + while (count && target_li) { + empty = LOCK_INFO_HASH_COUNT; + target_li = 0; + total = 0; + max_val = 0; + mean.success = 0; + mean.fail = 0; + mean.masked = 0; + mean.stack = 0; + mean.time = 0; + mean.lock = (simple_lock_data_t *) &lock_info; + mean.caller = (vm_offset_t) &lock_info; + for (bucket = 0; bucket < LOCK_INFO_HASH_COUNT; bucket++) { + li = &lock_info[bucket].info[0]; + if (li->lock) + empty--; + for (i= 0; i< LOCK_INFO_PER_BUCKET && li->lock; i++, li++) { + if (li->lock == &kdb_lock || li->lock == &printf_lock) + continue; + total++; + curval = *((int *)li + arg); + sum = li->success + li->fail; + if(!sum && !abs) + continue; + scurval = curval; + ssum = sum; + sli = li; + if (!abs) switch(arg) { + case 0: + break; + case 1: + case 2: + curval = (curval*100) / sum; + break; + case 3: + case 4: + curval = curval / sum; + break; + } + if (curval > max_val && curval < old_val) { + max_val = curval; + target_li = li; + } + if (curval == old_val && count != 0) { + print_lock_info(li); + count--; + } + mean.success += li->success; + mean.fail += li->fail; + mean.masked += li->masked; + mean.stack += li->stack; + mean.time += li->time; + } + } + if (target_li) + old_val = max_val; + } + db_printf("\n%d total locks, %d empty buckets", total, empty ); + if (default_lock_info.success) + db_printf(", default: %d", default_lock_info.success + default_lock_info.fail); + db_printf("\n"); + print_lock_info(&mean); +} + +#define lock_info_clear lic + +lock_info_clear() +{ + struct lock_info *li; + int bucket = 0; + int i; + for (bucket = 0; bucket < LOCK_INFO_HASH_COUNT; bucket++) { + li = &lock_info[bucket].info[0]; + for (i= 0; i< LOCK_INFO_PER_BUCKET; i++, li++) { + bzero(li, sizeof(struct lock_info)); + } + } + bzero(&default_lock_info, sizeof(struct lock_info)); +} + +print_lock_info(li) +struct lock_info *li; +{ + int off; + int sum = li->success + li->fail; + db_printf("%d %d/%d %d/%d %d/%d %d/%d ", li->success, + li->fail, (li->fail*100)/sum, + li->masked, (li->masked*100)/sum, + li->stack, li->stack/sum, + li->time, li->time/sum); + db_search_symbol(li->lock, 0, &off); + if (off < 1024) + db_printsym(li->lock, 0); + else { + db_printsym(li->caller, 0); + db_printf("(%X)", li->lock); + } + db_printf("\n"); +} + +#endif NCPUS > 1 && MACH_LOCK_MON + +#if TIME_STAMP + +/* + * Measure lock/unlock operations + */ + +time_lock(loops) +{ + decl_simple_lock_data(, lock) + register time_stamp_t stamp; + register int i; + + + if (!loops) + loops = 1000; + simple_lock_init(&lock); + stamp = time_stamp; + for (i = 0; i < loops; i++) { + simple_lock(&lock); + simple_unlock(&lock); + } + stamp = time_stamp - stamp; + db_printf("%d stamps for simple_locks\n", stamp/loops); +#if MACH_LOCK_MON + stamp = time_stamp; + for (i = 0; i < loops; i++) { + _simple_lock(&lock); + _simple_unlock(&lock); + } + stamp = time_stamp - stamp; + db_printf("%d stamps for _simple_locks\n", stamp/loops); +#endif MACH_LOCK_MON +} +#endif TIME_STAMP + +#if MACH_MP_DEBUG + +/* + * Arrange in the lock routines to call the following + * routines. This way, when locks are free there is no performance + * penalty + */ + +void +retry_simple_lock(lock) +decl_simple_lock_data(, *lock) +{ + register count = 0; + + while(!simple_lock_try(lock)) + if (count++ > 1000000 && lock != &kdb_lock) { + if (lock == &printf_lock) + return; + db_printf("cpu %d looping on simple_lock(%x) called by %x\n", + cpu_number(), lock, *(((int *)&lock) -1)); + Debugger(); + count = 0; + } +} + +void +retry_bit_lock(index, addr) +{ + register count = 0; + + while(!bit_lock_try(index, addr)) + if (count++ > 1000000) { + db_printf("cpu %d looping on bit_lock(%x, %x) called by %x\n", + cpu_number(), index, addr, *(((int *)&index) -1)); + Debugger(); + count = 0; + } +} +#endif MACH_MP_DEBUG + + + diff --git a/kern/mach.srv b/kern/mach.srv new file mode 100644 index 0000000..3ed9259 --- /dev/null +++ b/kern/mach.srv @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#ifdef MIGRATING_THREADS +#define task_threads task_acts +#define thread_terminate act_terminate +#define thread_set_state act_set_state_immediate +#define thread_get_state act_get_state_immediate +#define thread_info act_thread_info +#define thread_suspend act_suspend +#define thread_resume act_resume +#define thread_abort act_abort +#define thread_set_special_port act_set_special_port +#define thread_get_special_port act_get_special_port +#endif /* MIGRATING_THREADS */ + +simport <kern/compat_xxx_defs.h>; /* for obsolete routines */ + +#include <mach/mach.defs> diff --git a/kern/mach4.srv b/kern/mach4.srv new file mode 100644 index 0000000..ead5484 --- /dev/null +++ b/kern/mach4.srv @@ -0,0 +1,32 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#ifdef MIGRATING_THREADS +#define thread_enable_pc_sampling act_enable_pc_sampling +#define thread_disable_pc_sampling act_disable_pc_sampling +#define thread_get_sampled_pcs act_get_sampled_pcs +#endif /* MIGRATING_THREADS */ + +#include <mach/mach4.defs> diff --git a/kern/mach_clock.c b/kern/mach_clock.c new file mode 100644 index 0000000..667b211 --- /dev/null +++ b/kern/mach_clock.c @@ -0,0 +1,569 @@ +/* + * Mach Operating System + * Copyright (c) 1994-1988 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: clock_prim.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Clock primitives. + */ +#include <cpus.h> +#include <mach_pcsample.h> +#include <stat_time.h> + +#include <mach/boolean.h> +#include <mach/machine.h> +#include <mach/time_value.h> +#include <mach/vm_param.h> +#include <mach/vm_prot.h> +#include <kern/counters.h> +#include "cpu_number.h" +#include <kern/host.h> +#include <kern/lock.h> +#include <kern/mach_param.h> +#include <kern/processor.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/time_out.h> +#include <kern/time_stamp.h> +#include <vm/vm_kern.h> +#include <sys/time.h> +#include <machine/mach_param.h> /* HZ */ +#include <machine/machspl.h> + +#if MACH_PCSAMPLE +#include <kern/pc_sample.h> +#endif + + +void softclock(); /* forward */ + +int hz = HZ; /* number of ticks per second */ +int tick = (1000000 / HZ); /* number of usec per tick */ +time_value_t time = { 0, 0 }; /* time since bootup (uncorrected) */ +unsigned long elapsed_ticks = 0; /* ticks elapsed since bootup */ + +int timedelta = 0; +int tickdelta = 0; + +#if HZ > 500 +int tickadj = 1; /* can adjust HZ usecs per second */ +#else +int tickadj = 500 / HZ; /* can adjust 100 usecs per second */ +#endif +int bigadj = 1000000; /* adjust 10*tickadj if adjustment + > bigadj */ + +/* + * This update protocol, with a check value, allows + * do { + * secs = mtime->seconds; + * usecs = mtime->microseconds; + * } while (secs != mtime->check_seconds); + * to read the time correctly. (On a multiprocessor this assumes + * that processors see each other's writes in the correct order. + * We may have to insert fence operations.) + */ + +mapped_time_value_t *mtime = 0; + +#define update_mapped_time(time) \ +MACRO_BEGIN \ + if (mtime != 0) { \ + mtime->check_seconds = (time)->seconds; \ + mtime->microseconds = (time)->microseconds; \ + mtime->seconds = (time)->seconds; \ + } \ +MACRO_END + +decl_simple_lock_data(, timer_lock) /* lock for ... */ +timer_elt_data_t timer_head; /* ordered list of timeouts */ + /* (doubles as end-of-list) */ + +/* + * Handle clock interrupts. + * + * The clock interrupt is assumed to be called at a (more or less) + * constant rate. The rate must be identical on all CPUS (XXX - fix). + * + * Usec is the number of microseconds that have elapsed since the + * last clock tick. It may be constant or computed, depending on + * the accuracy of the hardware clock. + * + */ +void clock_interrupt(usec, usermode, basepri) + register int usec; /* microseconds per tick */ + boolean_t usermode; /* executing user code */ + boolean_t basepri; /* at base priority */ +{ + register int my_cpu = cpu_number(); + register thread_t thread = current_thread(); + + counter(c_clock_ticks++); + counter(c_threads_total += c_threads_current); + counter(c_stacks_total += c_stacks_current); + +#if STAT_TIME + /* + * Increment the thread time, if using + * statistical timing. + */ + if (usermode) { + timer_bump(&thread->user_timer, usec); + } + else { + timer_bump(&thread->system_timer, usec); + } +#endif STAT_TIME + + /* + * Increment the CPU time statistics. + */ + { + extern void thread_quantum_update(); /* in priority.c */ + register int state; + + if (usermode) + state = CPU_STATE_USER; + else if (!cpu_idle(my_cpu)) + state = CPU_STATE_SYSTEM; + else + state = CPU_STATE_IDLE; + + machine_slot[my_cpu].cpu_ticks[state]++; + + /* + * Adjust the thread's priority and check for + * quantum expiration. + */ + + thread_quantum_update(my_cpu, thread, 1, state); + } + +#if MACH_SAMPLE + /* + * Take a sample of pc for the user if required. + * This had better be MP safe. It might be interesting + * to keep track of cpu in the sample. + */ + if (usermode) { + take_pc_sample_macro(thread, SAMPLED_PC_PERIODIC); + } +#endif /* MACH_PCSAMPLE */ + + /* + * Time-of-day and time-out list are updated only + * on the master CPU. + */ + if (my_cpu == master_cpu) { + + register spl_t s; + register timer_elt_t telt; + boolean_t needsoft = FALSE; + +#if TS_FORMAT == 1 + /* + * Increment the tick count for the timestamping routine. + */ + ts_tick_count++; +#endif TS_FORMAT == 1 + + /* + * Update the tick count since bootup, and handle + * timeouts. + */ + + s = splsched(); + simple_lock(&timer_lock); + + elapsed_ticks++; + + telt = (timer_elt_t)queue_first(&timer_head.chain); + if (telt->ticks <= elapsed_ticks) + needsoft = TRUE; + simple_unlock(&timer_lock); + splx(s); + + /* + * Increment the time-of-day clock. + */ + if (timedelta == 0) { + time_value_add_usec(&time, usec); + } + else { + register int delta; + + if (timedelta < 0) { + delta = usec - tickdelta; + timedelta += tickdelta; + } + else { + delta = usec + tickdelta; + timedelta -= tickdelta; + } + time_value_add_usec(&time, delta); + } + update_mapped_time(&time); + + /* + * Schedule soft-interupt for timeout if needed + */ + if (needsoft) { + if (basepri) { + (void) splsoftclock(); + softclock(); + } + else { + setsoftclock(); + } + } + } +} + +/* + * There is a nasty race between softclock and reset_timeout. + * For example, scheduling code looks at timer_set and calls + * reset_timeout, thinking the timer is set. However, softclock + * has already removed the timer but hasn't called thread_timeout + * yet. + * + * Interim solution: We initialize timers after pulling + * them out of the queue, so a race with reset_timeout won't + * hurt. The timeout functions (eg, thread_timeout, + * thread_depress_timeout) check timer_set/depress_priority + * to see if the timer has been cancelled and if so do nothing. + * + * This still isn't correct. For example, softclock pulls a + * timer off the queue, then thread_go resets timer_set (but + * reset_timeout does nothing), then thread_set_timeout puts the + * timer back on the queue and sets timer_set, then + * thread_timeout finally runs and clears timer_set, then + * thread_set_timeout tries to put the timer on the queue again + * and corrupts it. + */ + +void softclock() +{ + /* + * Handle timeouts. + */ + spl_t s; + register timer_elt_t telt; + register int (*fcn)(); + register char *param; + + while (TRUE) { + s = splsched(); + simple_lock(&timer_lock); + telt = (timer_elt_t) queue_first(&timer_head.chain); + if (telt->ticks > elapsed_ticks) { + simple_unlock(&timer_lock); + splx(s); + break; + } + fcn = telt->fcn; + param = telt->param; + + remqueue(&timer_head.chain, (queue_entry_t)telt); + telt->set = TELT_UNSET; + simple_unlock(&timer_lock); + splx(s); + + assert(fcn != 0); + (*fcn)(param); + } +} + +/* + * Set timeout. + * + * Parameters: + * telt timer element. Function and param are already set. + * interval time-out interval, in hz. + */ +void set_timeout(telt, interval) + register timer_elt_t telt; /* already loaded */ + register unsigned int interval; +{ + spl_t s; + register timer_elt_t next; + + s = splsched(); + simple_lock(&timer_lock); + + interval += elapsed_ticks; + + for (next = (timer_elt_t)queue_first(&timer_head.chain); + ; + next = (timer_elt_t)queue_next((queue_entry_t)next)) { + + if (next->ticks > interval) + break; + } + telt->ticks = interval; + /* + * Insert new timer element before 'next' + * (after 'next'->prev) + */ + insque((queue_entry_t) telt, ((queue_entry_t)next)->prev); + telt->set = TELT_SET; + simple_unlock(&timer_lock); + splx(s); +} + +boolean_t reset_timeout(telt) + register timer_elt_t telt; +{ + spl_t s; + + s = splsched(); + simple_lock(&timer_lock); + if (telt->set) { + remqueue(&timer_head.chain, (queue_entry_t)telt); + telt->set = TELT_UNSET; + simple_unlock(&timer_lock); + splx(s); + return TRUE; + } + else { + simple_unlock(&timer_lock); + splx(s); + return FALSE; + } +} + +void init_timeout() +{ + simple_lock_init(&timer_lock); + queue_init(&timer_head.chain); + timer_head.ticks = ~0; /* MAXUINT - sentinel */ + + elapsed_ticks = 0; +} + +/* + * Read the time. + */ +kern_return_t +host_get_time(host, current_time) + host_t host; + time_value_t *current_time; /* OUT */ +{ + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + + do { + current_time->seconds = mtime->seconds; + current_time->microseconds = mtime->microseconds; + } while (current_time->seconds != mtime->check_seconds); + + return (KERN_SUCCESS); +} + +/* + * Set the time. Only available to privileged users. + */ +kern_return_t +host_set_time(host, new_time) + host_t host; + time_value_t new_time; +{ + spl_t s; + + if (host == HOST_NULL) + return(KERN_INVALID_HOST); + +#if NCPUS > 1 + /* + * Switch to the master CPU to synchronize correctly. + */ + thread_bind(current_thread(), master_processor); + if (current_processor() != master_processor) + thread_block((void (*)) 0); +#endif NCPUS > 1 + + s = splhigh(); + time = new_time; + update_mapped_time(&time); + resettodr(); + splx(s); + +#if NCPUS > 1 + /* + * Switch off the master CPU. + */ + thread_bind(current_thread(), PROCESSOR_NULL); +#endif NCPUS > 1 + + return (KERN_SUCCESS); +} + +/* + * Adjust the time gradually. + */ +kern_return_t +host_adjust_time(host, new_adjustment, old_adjustment) + host_t host; + time_value_t new_adjustment; + time_value_t *old_adjustment; /* OUT */ +{ + time_value_t oadj; + unsigned int ndelta; + spl_t s; + + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + ndelta = new_adjustment.seconds * 1000000 + + new_adjustment.microseconds; + +#if NCPUS > 1 + thread_bind(current_thread(), master_processor); + if (current_processor() != master_processor) + thread_block((void (*)) 0); +#endif NCPUS > 1 + + s = splclock(); + + oadj.seconds = timedelta / 1000000; + oadj.microseconds = timedelta % 1000000; + + if (timedelta == 0) { + if (ndelta > bigadj) + tickdelta = 10 * tickadj; + else + tickdelta = tickadj; + } + if (ndelta % tickdelta) + ndelta = ndelta / tickdelta * tickdelta; + + timedelta = ndelta; + + splx(s); +#if NCPUS > 1 + thread_bind(current_thread(), PROCESSOR_NULL); +#endif NCPUS > 1 + + *old_adjustment = oadj; + + return (KERN_SUCCESS); +} + +void mapable_time_init() +{ + if (kmem_alloc_wired(kernel_map, (vm_offset_t *) &mtime, PAGE_SIZE) + != KERN_SUCCESS) + panic("mapable_time_init"); + bzero((char *)mtime, PAGE_SIZE); + update_mapped_time(&time); +} + +int timeopen() +{ + return(0); +} +int timeclose() +{ + return(0); +} + +/* + * Compatibility for device drivers. + * New code should use set_timeout/reset_timeout and private timers. + * These code can't use a zone to allocate timers, because + * it can be called from interrupt handlers. + */ + +#define NTIMERS 20 + +timer_elt_data_t timeout_timers[NTIMERS]; + +/* + * Set timeout. + * + * fcn: function to call + * param: parameter to pass to function + * interval: timeout interval, in hz. + */ +void timeout(fcn, param, interval) + int (*fcn)(/* char * param */); + char * param; + int interval; +{ + spl_t s; + register timer_elt_t elt; + + s = splsched(); + simple_lock(&timer_lock); + for (elt = &timeout_timers[0]; elt < &timeout_timers[NTIMERS]; elt++) + if (elt->set == TELT_UNSET) + break; + if (elt == &timeout_timers[NTIMERS]) + panic("timeout"); + elt->fcn = fcn; + elt->param = param; + elt->set = TELT_ALLOC; + simple_unlock(&timer_lock); + splx(s); + + set_timeout(elt, (unsigned int)interval); +} + +/* + * Returns a boolean indicating whether the timeout element was found + * and removed. + */ +boolean_t untimeout(fcn, param) + register int (*fcn)(); + register char * param; +{ + spl_t s; + register timer_elt_t elt; + + s = splsched(); + simple_lock(&timer_lock); + queue_iterate(&timer_head.chain, elt, timer_elt_t, chain) { + + if ((fcn == elt->fcn) && (param == elt->param)) { + /* + * Found it. + */ + remqueue(&timer_head.chain, (queue_entry_t)elt); + elt->set = TELT_UNSET; + + simple_unlock(&timer_lock); + splx(s); + return (TRUE); + } + } + simple_unlock(&timer_lock); + splx(s); + return (FALSE); +} diff --git a/kern/mach_debug.srv b/kern/mach_debug.srv new file mode 100644 index 0000000..c78b9a4 --- /dev/null +++ b/kern/mach_debug.srv @@ -0,0 +1,26 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +#include <mach_debug/mach_debug.defs> diff --git a/kern/mach_factor.c b/kern/mach_factor.c new file mode 100644 index 0000000..1a17213 --- /dev/null +++ b/kern/mach_factor.c @@ -0,0 +1,153 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/mach_factor.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Compute the Mach Factor. + */ + +#include <cpus.h> + +#include <mach/machine.h> +#include <mach/processor_info.h> +#include <kern/sched.h> +#include <kern/processor.h> +#include <kern/time_out.h> +#if MACH_KERNEL +#include <mach/kern_return.h> +#include <mach/port.h> +#endif MACH_KERNEL + + +long avenrun[3] = {0, 0, 0}; +long mach_factor[3] = {0, 0, 0}; + +/* + * Values are scaled by LOAD_SCALE, defined in processor_info.h + */ +static long fract[3] = { + 800, /* (4.0/5.0) 5 second average */ + 966, /* (29.0/30.0) 30 second average */ + 983, /* (59.0/60.) 1 minute average */ +}; + +void compute_mach_factor() +{ + register processor_set_t pset; + register processor_t processor; + register int ncpus; + register int nthreads; + register long factor_now; + register long average_now; + register long load_now; + + simple_lock(&all_psets_lock); + pset = (processor_set_t) queue_first(&all_psets); + while (!queue_end(&all_psets, (queue_entry_t)pset)) { + + /* + * If no processors, this pset is in suspended animation. + * No load calculations are performed. + */ + pset_lock(pset); + if((ncpus = pset->processor_count) > 0) { + + /* + * Count number of threads. + */ + nthreads = pset->runq.count; + processor = (processor_t) queue_first(&pset->processors); + while (!queue_end(&pset->processors, + (queue_entry_t)processor)) { + nthreads += processor->runq.count; + processor = + (processor_t) queue_next(&processor->processors); + } + + /* + * account for threads on cpus. + */ + nthreads += ncpus - pset->idle_count; + + /* + * The current thread (running this calculation) + * doesn't count; it's always in the default pset. + */ + if (pset == &default_pset) + nthreads -= 1; + + if (nthreads > ncpus) { + factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1); + load_now = (nthreads << SCHED_SHIFT) / ncpus; + } + else { + factor_now = (ncpus - nthreads) * LOAD_SCALE; + load_now = SCHED_SCALE; + } + + /* + * Load average and mach factor calculations for + * those that ask about these things. + */ + + average_now = nthreads * LOAD_SCALE; + + pset->mach_factor = + ((pset->mach_factor << 2) + factor_now)/5; + pset->load_average = + ((pset->load_average << 2) + average_now)/5; + + /* + * And some ugly stuff to keep w happy. + */ + if (pset == &default_pset) { + register int i; + + for (i = 0; i < 3; i++) { + mach_factor[i] = ( (mach_factor[i]*fract[i]) + + (factor_now*(LOAD_SCALE-fract[i])) ) + / LOAD_SCALE; + avenrun[i] = ( (avenrun[i]*fract[i]) + + (average_now*(LOAD_SCALE-fract[i])) ) + / LOAD_SCALE; + } + } + + /* + * sched_load is the only thing used by scheduler. + * It is always at least 1 (i.e. SCHED_SCALE). + */ + pset->sched_load = (pset->sched_load + load_now) >> 1; + } + + pset_unlock(pset); + pset = (processor_set_t) queue_next(&pset->all_psets); + } + + simple_unlock(&all_psets_lock); +} diff --git a/kern/mach_host.srv b/kern/mach_host.srv new file mode 100644 index 0000000..30d78db --- /dev/null +++ b/kern/mach_host.srv @@ -0,0 +1,39 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* This is a server presentation file. */ + +#define KERNEL_SERVER 1 + +simport <kern/compat_xxx_defs.h>; /* for obsolete routines */ + +#ifdef MIGRATING_THREADS +#define thread_assign act_thread_assign +#define thread_assign_default act_thread_assign_default +#define thread_get_assignment act_thread_get_assignment +#define thread_priority act_thread_priority +#define thread_max_priority act_thread_max_priority +#define thread_policy act_thread_policy +#define thread_depress_abort act_thread_depress_abort +#define thread_wire act_thread_wire +#endif /* MIGRATING_THREADS */ + +#include <mach/mach_host.defs> diff --git a/kern/mach_param.h b/kern/mach_param.h new file mode 100644 index 0000000..5fc2063 --- /dev/null +++ b/kern/mach_param.h @@ -0,0 +1,67 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/mach_param.h + * Author: Avadis Tevanian, Jr., Michael Wayne Young + * Date: 1986 + * + * Mach system sizing parameters + * + */ + +#ifndef _KERN_MACH_PARAM_H_ +#define _KERN_MACH_PARAM_H_ + +#define THREAD_MAX 1024 /* Max number of threads */ +#define THREAD_CHUNK 64 /* Allocation chunk */ + +#define TASK_MAX 1024 /* Max number of tasks */ +#define TASK_CHUNK 64 /* Allocation chunk */ + +#define ACT_MAX 1024 /* Max number of acts */ +#define ACT_CHUNK 64 /* Allocation chunk */ + +#define ACTPOOL_MAX 1024 +#define ACTPOOL_CHUNK 64 + +#define PORT_MAX ((TASK_MAX * 3 + THREAD_MAX) /* kernel */ \ + + (THREAD_MAX * 2) /* user */ \ + + 40000) /* slop for objects */ + /* Number of ports, system-wide */ + +#define SET_MAX (TASK_MAX + THREAD_MAX + 200) + /* Max number of port sets */ + +#define ITE_MAX (1 << 16) /* Max number of splay tree entries */ + +#define SPACE_MAX (TASK_MAX + 5) /* Max number of IPC spaces */ + +#define IMAR_MAX (1 << 10) /* Max number of msg-accepted reqs */ + +#endif _KERN_MACH_PARAM_H_ diff --git a/kern/machine.c b/kern/machine.c new file mode 100644 index 0000000..fef541e --- /dev/null +++ b/kern/machine.c @@ -0,0 +1,765 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/machine.c + * Author: Avadis Tevanian, Jr. + * Date: 1987 + * + * Support for machine independent machine abstraction. + */ + +#include <norma_ether.h> +#include <cpus.h> +#include <mach_host.h> + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/mach_types.h> +#include <mach/machine.h> +#include <mach/host_info.h> +#include <kern/counters.h> +#include <kern/ipc_host.h> +#include <kern/host.h> +#include <kern/lock.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <machine/machspl.h> /* for splsched */ +#include <sys/reboot.h> + + + +/* + * Exported variables: + */ + +struct machine_info machine_info; +struct machine_slot machine_slot[NCPUS]; + +queue_head_t action_queue; /* assign/shutdown queue */ +decl_simple_lock_data(,action_lock); + +/* + * xxx_host_info: + * + * Return the host_info structure. This routine is exported to the + * user level. + */ +kern_return_t xxx_host_info(task, info) + task_t task; + machine_info_t info; +{ +#ifdef lint + task++; +#endif /* lint */ + *info = machine_info; + return(KERN_SUCCESS); +} + +/* + * xxx_slot_info: + * + * Return the slot_info structure for the specified slot. This routine + * is exported to the user level. + */ +kern_return_t xxx_slot_info(task, slot, info) + task_t task; + int slot; + machine_slot_t info; +{ +#ifdef lint + task++; +#endif /* lint */ + if ((slot < 0) || (slot >= NCPUS)) + return(KERN_INVALID_ARGUMENT); + *info = machine_slot[slot]; + return(KERN_SUCCESS); +} + +/* + * xxx_cpu_control: + * + * Support for user control of cpus. The user indicates which cpu + * he is interested in, and whether or not that cpu should be running. + */ +kern_return_t xxx_cpu_control(task, cpu, runnable) + task_t task; + int cpu; + boolean_t runnable; +{ +#ifdef lint + task++; cpu++; runnable++; +#endif /* lint */ + return(KERN_FAILURE); +} + +/* + * cpu_up: + * + * Flag specified cpu as up and running. Called when a processor comes + * online. + */ +void cpu_up(cpu) + int cpu; +{ + register struct machine_slot *ms; + register processor_t processor; + register spl_t s; + + processor = cpu_to_processor(cpu); + pset_lock(&default_pset); + s = splsched(); + processor_lock(processor); +#if NCPUS > 1 + init_ast_check(processor); +#endif /* NCPUS > 1 */ + ms = &machine_slot[cpu]; + ms->running = TRUE; + machine_info.avail_cpus++; + pset_add_processor(&default_pset, processor); + processor->state = PROCESSOR_RUNNING; + processor_unlock(processor); + splx(s); + pset_unlock(&default_pset); +} + +/* + * cpu_down: + * + * Flag specified cpu as down. Called when a processor is about to + * go offline. + */ +void cpu_down(cpu) + int cpu; +{ + register struct machine_slot *ms; + register processor_t processor; + register spl_t s; + + s = splsched(); + processor = cpu_to_processor(cpu); + processor_lock(processor); + ms = &machine_slot[cpu]; + ms->running = FALSE; + machine_info.avail_cpus--; + /* + * processor has already been removed from pset. + */ + processor->processor_set_next = PROCESSOR_SET_NULL; + processor->state = PROCESSOR_OFF_LINE; + processor_unlock(processor); + splx(s); +} + +kern_return_t +host_reboot(host, options) + host_t host; + int options; +{ + if (host == HOST_NULL) + return (KERN_INVALID_HOST); + + if (options & RB_DEBUGGER) { + extern void Debugger(); + Debugger("Debugger"); + } else { +#ifdef parisc +/* XXX this could be made common */ + halt_all_cpus(options); +#else + halt_all_cpus(!(options & RB_HALT)); +#endif + } + return (KERN_SUCCESS); +} + +#if NCPUS > 1 +/* + * processor_request_action - common internals of processor_assign + * and processor_shutdown. If new_pset is null, this is + * a shutdown, else it's an assign and caller must donate + * a reference. + */ +void +processor_request_action(processor, new_pset) +processor_t processor; +processor_set_t new_pset; +{ + register processor_set_t pset; + + /* + * Processor must be in a processor set. Must lock its idle lock to + * get at processor state. + */ + pset = processor->processor_set; + simple_lock(&pset->idle_lock); + + /* + * If the processor is dispatching, let it finish - it will set its + * state to running very soon. + */ + while (*(volatile int *)&processor->state == PROCESSOR_DISPATCHING) + continue; + + /* + * Now lock the action queue and do the dirty work. + */ + simple_lock(&action_lock); + + switch (processor->state) { + case PROCESSOR_IDLE: + /* + * Remove from idle queue. + */ + queue_remove(&pset->idle_queue, processor, processor_t, + processor_queue); + pset->idle_count--; + + /* fall through ... */ + case PROCESSOR_RUNNING: + /* + * Put it on the action queue. + */ + queue_enter(&action_queue, processor, processor_t, + processor_queue); + + /* fall through ... */ + case PROCESSOR_ASSIGN: + /* + * And ask the action_thread to do the work. + */ + + if (new_pset == PROCESSOR_SET_NULL) { + processor->state = PROCESSOR_SHUTDOWN; + } + else { + assert(processor->state != PROCESSOR_ASSIGN); + processor->state = PROCESSOR_ASSIGN; + processor->processor_set_next = new_pset; + } + break; + + default: + printf("state: %d\n", processor->state); + panic("processor_request_action: bad state"); + } + simple_unlock(&action_lock); + simple_unlock(&pset->idle_lock); + + thread_wakeup((event_t)&action_queue); +} + +#if MACH_HOST +/* + * processor_assign() changes the processor set that a processor is + * assigned to. Any previous assignment in progress is overridden. + * Synchronizes with assignment completion if wait is TRUE. + */ +kern_return_t +processor_assign(processor, new_pset, wait) +processor_t processor; +processor_set_t new_pset; +boolean_t wait; +{ + spl_t s; + + /* + * Check for null arguments. + * XXX Can't assign master processor. + */ + if (processor == PROCESSOR_NULL || new_pset == PROCESSOR_SET_NULL || + processor == master_processor) { + return(KERN_INVALID_ARGUMENT); + } + + /* + * Get pset reference to donate to processor_request_action. + */ + pset_reference(new_pset); + + /* + * Check processor status. + * If shutdown or being shutdown, can`t reassign. + * If being assigned, wait for assignment to finish. + */ +Retry: + s = splsched(); + processor_lock(processor); + if(processor->state == PROCESSOR_OFF_LINE || + processor->state == PROCESSOR_SHUTDOWN) { + /* + * Already shutdown or being shutdown -- Can't reassign. + */ + processor_unlock(processor); + (void) splx(s); + pset_deallocate(new_pset); + return(KERN_FAILURE); + } + + if (processor->state == PROCESSOR_ASSIGN) { + assert_wait((event_t) processor, TRUE); + processor_unlock(processor); + splx(s); + thread_block((void(*)()) 0); + goto Retry; + } + + /* + * Avoid work if processor is already in this processor set. + */ + if (processor->processor_set == new_pset) { + processor_unlock(processor); + (void) splx(s); + /* clean up dangling ref */ + pset_deallocate(new_pset); + return(KERN_SUCCESS); + } + + /* + * OK to start processor assignment. + */ + processor_request_action(processor, new_pset); + + /* + * Synchronization with completion. + */ + if (wait) { + while (processor->state == PROCESSOR_ASSIGN || + processor->state == PROCESSOR_SHUTDOWN) { + assert_wait((event_t)processor, TRUE); + processor_unlock(processor); + splx(s); + thread_block((void (*)()) 0); + s = splsched(); + processor_lock(processor); + } + } + processor_unlock(processor); + splx(s); + + return(KERN_SUCCESS); +} + +#else /* MACH_HOST */ + +kern_return_t +processor_assign(processor, new_pset, wait) +processor_t processor; +processor_set_t new_pset; +boolean_t wait; +{ +#ifdef lint + processor++; new_pset++; wait++; +#endif + return KERN_FAILURE; +} + +#endif /* MACH_HOST */ + +/* + * processor_shutdown() queues a processor up for shutdown. + * Any assignment in progress is overriden. It does not synchronize + * with the shutdown (can be called from interrupt level). + */ +kern_return_t +processor_shutdown(processor) +processor_t processor; +{ + spl_t s; + + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + processor_lock(processor); + if(processor->state == PROCESSOR_OFF_LINE || + processor->state == PROCESSOR_SHUTDOWN) { + /* + * Already shutdown or being shutdown -- nothing to do. + */ + processor_unlock(processor); + splx(s); + return(KERN_SUCCESS); + } + + processor_request_action(processor, PROCESSOR_SET_NULL); + processor_unlock(processor); + splx(s); + + return(KERN_SUCCESS); +} + +/* + * action_thread() shuts down processors or changes their assignment. + */ +void processor_doaction(); /* forward */ + +void action_thread_continue() +{ + register processor_t processor; + register spl_t s; + + while (TRUE) { + s = splsched(); + simple_lock(&action_lock); + while ( !queue_empty(&action_queue)) { + processor = (processor_t) queue_first(&action_queue); + queue_remove(&action_queue, processor, processor_t, + processor_queue); + simple_unlock(&action_lock); + (void) splx(s); + + processor_doaction(processor); + + s = splsched(); + simple_lock(&action_lock); + } + + assert_wait((event_t) &action_queue, FALSE); + simple_unlock(&action_lock); + (void) splx(s); + counter(c_action_thread_block++); + thread_block(action_thread_continue); + } +} + +void action_thread() +{ + action_thread_continue(); + /*NOTREACHED*/ +} + +/* + * processor_doaction actually does the shutdown. The trick here + * is to schedule ourselves onto a cpu and then save our + * context back into the runqs before taking out the cpu. + */ +#ifdef __GNUC__ +__volatile__ +#endif +void processor_doshutdown(); /* forward */ + +void processor_doaction(processor) +register processor_t processor; +{ + thread_t this_thread; + spl_t s; + register processor_set_t pset; +#if MACH_HOST + register processor_set_t new_pset; + register thread_t thread; + register thread_t prev_thread = THREAD_NULL; + boolean_t have_pset_ref = FALSE; +#endif /* MACH_HOST */ + + /* + * Get onto the processor to shutdown + */ + this_thread = current_thread(); + thread_bind(this_thread, processor); + thread_block((void (*)()) 0); + + pset = processor->processor_set; +#if MACH_HOST + /* + * If this is the last processor in the processor_set, + * stop all the threads first. + */ + pset_lock(pset); + if (pset->processor_count == 1) { + /* + * First suspend all of them. + */ + queue_iterate(&pset->threads, thread, thread_t, pset_threads) { + thread_hold(thread); + } + pset->empty = TRUE; + /* + * Now actually stop them. Need a pset reference. + */ + pset->ref_count++; + have_pset_ref = TRUE; + +Restart_thread: + prev_thread = THREAD_NULL; + queue_iterate(&pset->threads, thread, thread_t, pset_threads) { + thread_reference(thread); + pset_unlock(pset); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + + /* + * Only wait for threads still in the pset. + */ + thread_freeze(thread); + if (thread->processor_set != pset) { + /* + * It got away - start over. + */ + thread_unfreeze(thread); + thread_deallocate(thread); + pset_lock(pset); + goto Restart_thread; + } + + (void) thread_dowait(thread, TRUE); + prev_thread = thread; + pset_lock(pset); + thread_unfreeze(prev_thread); + } + } + pset_unlock(pset); + + /* + * At this point, it is ok to remove the processor from the pset. + * We can use processor->processor_set_next without locking the + * processor, since it cannot change while processor->state is + * PROCESSOR_ASSIGN or PROCESSOR_SHUTDOWN. + */ + + new_pset = processor->processor_set_next; + +Restart_pset: + if (new_pset) { + /* + * Reassigning processor. + */ + + if ((integer_t) pset < (integer_t) new_pset) { + pset_lock(pset); + pset_lock(new_pset); + } + else { + pset_lock(new_pset); + pset_lock(pset); + } + if (!(new_pset->active)) { + pset_unlock(new_pset); + pset_unlock(pset); + pset_deallocate(new_pset); + new_pset = &default_pset; + pset_reference(new_pset); + goto Restart_pset; + } + + /* + * Handle remove last / assign first race. + * Only happens if there is more than one action thread. + */ + while (new_pset->empty && new_pset->processor_count > 0) { + pset_unlock(new_pset); + pset_unlock(pset); + while (*(volatile boolean_t *)&new_pset->empty && + *(volatile int *)&new_pset->processor_count > 0) + /* spin */; + goto Restart_pset; + } + + /* + * Lock the processor. new_pset should not have changed. + */ + s = splsched(); + processor_lock(processor); + assert(processor->processor_set_next == new_pset); + + /* + * Shutdown may have been requested while this assignment + * was in progress. + */ + if (processor->state == PROCESSOR_SHUTDOWN) { + processor->processor_set_next = PROCESSOR_SET_NULL; + pset_unlock(new_pset); + goto shutdown; /* releases pset reference */ + } + + /* + * Do assignment, then wakeup anyone waiting for it. + */ + pset_remove_processor(pset, processor); + pset_unlock(pset); + + pset_add_processor(new_pset, processor); + if (new_pset->empty) { + /* + * Set all the threads loose. + * + * NOTE: this appears to violate the locking + * order, since the processor lock should + * be taken AFTER a thread lock. However, + * thread_setrun (called by thread_release) + * only takes the processor lock if the + * processor is idle. The processor is + * not idle here. + */ + queue_iterate(&new_pset->threads, thread, thread_t, + pset_threads) { + thread_release(thread); + } + new_pset->empty = FALSE; + } + processor->processor_set_next = PROCESSOR_SET_NULL; + processor->state = PROCESSOR_RUNNING; + thread_wakeup((event_t)processor); + processor_unlock(processor); + splx(s); + pset_unlock(new_pset); + + /* + * Clean up dangling references, and release our binding. + */ + pset_deallocate(new_pset); + if (have_pset_ref) + pset_deallocate(pset); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + thread_bind(this_thread, PROCESSOR_NULL); + + thread_block((void (*)()) 0); + return; + } + +#endif /* MACH_HOST */ + + /* + * Do shutdown, make sure we live when processor dies. + */ + if (processor->state != PROCESSOR_SHUTDOWN) { + printf("state: %d\n", processor->state); + panic("action_thread -- bad processor state"); + } + + s = splsched(); + processor_lock(processor); + + shutdown: + pset_remove_processor(pset, processor); + processor_unlock(processor); + pset_unlock(pset); + splx(s); + + /* + * Clean up dangling references, and release our binding. + */ +#if MACH_HOST + if (new_pset != PROCESSOR_SET_NULL) + pset_deallocate(new_pset); + if (have_pset_ref) + pset_deallocate(pset); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); +#endif /* MACH_HOST */ + + thread_bind(this_thread, PROCESSOR_NULL); + switch_to_shutdown_context(this_thread, + processor_doshutdown, + processor); + +} + +/* + * Actually do the processor shutdown. This is called at splsched, + * running on the processor's shutdown stack. + */ + +#ifdef __GNUC__ +extern __volatile__ void halt_cpu(); +#endif + +#ifdef __GNUC__ +__volatile__ +#endif +void processor_doshutdown(processor) +register processor_t processor; +{ + register int cpu = processor->slot_num; + + timer_switch(&kernel_timer[cpu]); + + /* + * Ok, now exit this cpu. + */ + PMAP_DEACTIVATE_KERNEL(cpu); +#ifndef MIGRATING_THREADS + active_threads[cpu] = THREAD_NULL; +#endif + cpu_down(cpu); + thread_wakeup((event_t)processor); + halt_cpu(); + /* + * The action thread returns to life after the call to + * switch_to_shutdown_context above, on some other cpu. + */ + + /*NOTREACHED*/ +} +#else /* NCPUS > 1 */ + +kern_return_t +processor_assign(processor, new_pset, wait) +processor_t processor; +processor_set_t new_pset; +boolean_t wait; +{ +#ifdef lint + processor++; new_pset++; wait++; +#endif lint + return(KERN_FAILURE); +} + +#endif /* NCPUS > 1 */ + +kern_return_t +host_get_boot_info(priv_host, boot_info) + host_t priv_host; + kernel_boot_info_t boot_info; +{ + char *src = ""; + + if (priv_host == HOST_NULL) { + return KERN_INVALID_HOST; + } + +#if NORMA_ETHER +{ + extern char *norma_ether_boot_info(); + src = norma_ether_boot_info(); +} +#endif /* NORMA_ETHER */ +#if defined(iPSC386) || defined(iPSC860) +{ + extern char *ipsc_boot_environ(); + src = ipsc_boot_environ(); +} +#endif /* defined(iPSC386) || defined(iPSC860) */ + + (void) strncpy(boot_info, src, KERNEL_BOOT_INFO_MAX); + return KERN_SUCCESS; +} diff --git a/kern/macro_help.h b/kern/macro_help.h new file mode 100644 index 0000000..e13b01d --- /dev/null +++ b/kern/macro_help.h @@ -0,0 +1,55 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/macro_help.h + * + * Provide help in making lint-free macro routines + * + */ + +#ifndef _KERN_MACRO_HELP_H_ +#define _KERN_MACRO_HELP_H_ + +#if !defined(MACRO_BEGIN) + +#include <mach/boolean.h> + +#ifdef lint +boolean_t NEVER; +boolean_t ALWAYS; +#else /* lint */ +#define NEVER FALSE +#define ALWAYS TRUE +#endif /* lint */ + +#define MACRO_BEGIN do { +#define MACRO_END } while (NEVER) + +#define MACRO_RETURN if (ALWAYS) return + +#endif /* !MACRO_BEGIN */ + +#endif /* _KERN_MACRO_HELP_H_ */ diff --git a/kern/pc_sample.c b/kern/pc_sample.c new file mode 100644 index 0000000..01b9acb --- /dev/null +++ b/kern/pc_sample.c @@ -0,0 +1,299 @@ +/* + * Mach Operating System + * Copyright (c) 1993,1992 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + + + +#include <mach_pcsample.h> + +#include <mach/mach_types.h> /* vm_address_t */ +#include <mach/std_types.h> /* pointer_t */ +#include <mach/pc_sample.h> +#include <kern/host.h> +#include <kern/thread.h> +#include <kern/pc_sample.h> + +#if MACH_PCSAMPLE + +#define MAX_PC_SAMPLES 512 + +typedef sampled_pc_t sampled_pcs[MAX_PC_SAMPLES]; + +int pc_sampling_enabled = 0; +decl_simple_lock_data(, pc_sampling_lock) /* lock for enabling */ + +void take_pc_sample( + register thread_t t, + register sample_control_t *cp, + sampled_pc_flavor_t flavor) +{ + vm_offset_t pc; + struct sampled_pc *sample; + + pc = interrupted_pc(t); + cp->seqno++; + sample = &((sampled_pc_t *)cp->buffer)[cp->seqno % MAX_PC_SAMPLES]; + sample->id = (natural_t)t; + sample->pc = pc; + sample->sampletype = flavor; +} + +kern_return_t +thread_enable_pc_sampling( + thread_t thread, + int *tickp, + sampled_pc_flavor_t flavors) +{ + vm_offset_t buf; + extern int tick; + + if (thread == THREAD_NULL) { + return KERN_INVALID_ARGUMENT; + } + if (thread->pc_sample.buffer == 0) { + buf = (vm_offset_t) kalloc(sizeof (sampled_pcs)); + if (buf == 0) { + printf("thread_enable_pc_sampling: kalloc failed\n"); + return KERN_INVALID_ARGUMENT; + } + thread->pc_sample.buffer = buf; + thread->pc_sample.seqno = 0; + } + *tickp = tick; + thread->pc_sample.sampletypes = flavors; + return KERN_SUCCESS; +} + +kern_return_t +task_enable_pc_sampling( + task_t task, + int *tickp, + sampled_pc_flavor_t flavors) +{ + vm_offset_t buf; + extern int tick; + + if (task == TASK_NULL) { + return KERN_INVALID_ARGUMENT; + } + if (task->pc_sample.buffer == 0) { + buf = (vm_offset_t) kalloc(sizeof (sampled_pcs)); + if (buf == 0) { + printf("task_enable_pc_sampling: kalloc failed\n"); + return KERN_INVALID_ARGUMENT; + } + task->pc_sample.buffer = buf; + task->pc_sample.seqno = 0; + } + *tickp = tick; + task->pc_sample.sampletypes = flavors; + return KERN_SUCCESS; +} + +kern_return_t +thread_disable_pc_sampling( + thread_t thread, + int *samplecntp) +{ + vm_offset_t buf; + + if (thread == THREAD_NULL) { + return KERN_INVALID_ARGUMENT; + } + if ((buf = thread->pc_sample.buffer) != 0) + kfree(buf, sizeof (sampled_pcs)); + thread->pc_sample.buffer = (vm_offset_t) 0; + thread->pc_sample.seqno = 0; + thread->pc_sample.sampletypes = 0; /* shut off sampling */ + + return KERN_SUCCESS; +} + +kern_return_t +task_disable_pc_sampling( + task_t task, + int *samplecntp) +{ + vm_offset_t buf; + + if (task == TASK_NULL) { + return KERN_INVALID_ARGUMENT; + } + if ((buf = task->pc_sample.buffer) != 0) + kfree(buf, sizeof (sampled_pcs)); + task->pc_sample.buffer = (vm_offset_t) 0; + task->pc_sample.seqno = 0; + task->pc_sample.sampletypes = 0; /* shut off sampling */ + + return KERN_SUCCESS; +} + +static kern_return_t +get_sampled_pcs( + sample_control_t *cp, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + int *sampled_pcs_cntp) +{ + int nsamples; + sampled_pc_seqno_t seqidx1, seqidx2; + + nsamples = cp->seqno - *seqnop; + seqidx1 = *seqnop % MAX_PC_SAMPLES; /* index of *seqnop */ + seqidx2 = cp->seqno % MAX_PC_SAMPLES; /* index of cp->seqno */ + + if (nsamples > MAX_PC_SAMPLES) { + nsamples = MAX_PC_SAMPLES; + seqidx1 = (seqidx2 + 1) % MAX_PC_SAMPLES; + } + + if (nsamples > 0) { + /* + * Carefully copy sampled_pcs into sampled_pcs_msgbuf IN ORDER. + */ + if (seqidx1 < seqidx2) { + /* + * Simple case: no wraparound. + * Copy from seqidx1 to seqidx2. + */ + bcopy((sampled_pc_array_t)cp->buffer + seqidx1 + 1, + sampled_pcs_out, + nsamples * sizeof(sampled_pc_t)); + } else { + /* seqidx1 > seqidx2 -- Handle wraparound. */ + + bcopy((sampled_pc_array_t)cp->buffer + seqidx1 + 1, + sampled_pcs_out, + (MAX_PC_SAMPLES - seqidx1 - 1) * sizeof(sampled_pc_t)); + + bcopy((sampled_pc_array_t)cp->buffer, + sampled_pcs_out + (MAX_PC_SAMPLES - seqidx1 - 1), + (seqidx2 + 1) * sizeof(sampled_pc_t)); + } + } else { + /* could either be zero because of overflow, or because + * we are being lied to. In either case, return nothing. + * If overflow, only once in a blue moon. If being lied to, + * then we have no obligation to return anything useful anyway. + */ + ; + } + + *sampled_pcs_cntp = nsamples; + *seqnop = cp->seqno; + return KERN_SUCCESS; +} + +kern_return_t +thread_get_sampled_pcs( + thread_t thread, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + int *sampled_pcs_cntp) +{ + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + if (thread->pc_sample.buffer == 0) + return KERN_FAILURE; + + return get_sampled_pcs(&thread->pc_sample, seqnop, sampled_pcs_out, + sampled_pcs_cntp); +} + +kern_return_t +task_get_sampled_pcs( + task_t task, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + int *sampled_pcs_cntp) +{ + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + if (task->pc_sample.buffer == 0) + return KERN_FAILURE; + + return get_sampled_pcs(&task->pc_sample, seqnop, sampled_pcs_out, + sampled_pcs_cntp); +} + +#else /* MACH_PCSAMPLE */ + +kern_return_t +thread_enable_pc_sampling( + thread_t thread, + int *tickp, + sampled_pc_flavor_t flavors) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +task_enable_pc_sampling( + task_t task, + int *tickp, + sampled_pc_flavor_t flavors) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +thread_disable_pc_sampling( + thread_t thread, + int *samplecntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +task_disable_pc_sampling( + task_t task, + int *samplecntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +thread_get_sampled_pcs( + thread_t thread, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + int *sampled_pcs_cntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +kern_return_t +task_get_sampled_pcs( + task_t task, + sampled_pc_seqno_t *seqnop, + sampled_pc_array_t sampled_pcs_out, + int *sampled_pcs_cntp) +{ + return KERN_FAILURE; /* not implemented */ +} + +#endif /* MACH_PCSAMPLE */ diff --git a/kern/pc_sample.h b/kern/pc_sample.h new file mode 100644 index 0000000..02891e0 --- /dev/null +++ b/kern/pc_sample.h @@ -0,0 +1,90 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * HISTORY + * $Log: pc_sample.h,v $ + * Revision 1.1.1.1 1996/10/30 01:38:13 thomas + * Imported from UK22 + * + * Revision 1.1 1994/11/02 02:24:15 law + * Initial revision + * + * Revision 2.2 93/11/17 19:06:01 dbg + * Moved kernel internal definitions here from mach/pc_sample.h. + * [93/09/24 dbg] + * + */ + +/* + * Kernel definitions for PC sampling. + */ +#ifndef _KERN_PC_SAMPLE_H_ +#define _KERN_PC_SAMPLE_H_ + +#include <mach/pc_sample.h> +#include <mach/machine/vm_types.h> +#include <kern/kern_types.h> +#include <kern/macro_help.h> + +/* + * Control structure for sampling, included in + * threads and tasks. If sampletypes is 0, no + * sampling is done. + */ + +struct sample_control { + vm_offset_t buffer; + unsigned int seqno; + sampled_pc_flavor_t sampletypes; +}; + +typedef struct sample_control sample_control_t; + +/* + * Routines to take PC samples. + */ +extern void take_pc_sample( + thread_t thread, + sample_control_t *cp, + sampled_pc_flavor_t flavor); + +/* + * Macro to do quick flavor check for sampling, + * on both threads and tasks. + */ +#define take_pc_sample_macro(thread, flavor) \ + MACRO_BEGIN \ + task_t task; \ + \ + if ((thread)->pc_sample.sampletypes & (flavor)) \ + take_pc_sample((thread), &(thread)->pc_sample, (flavor)); \ + \ + task = (thread)->task; \ + if (task->pc_sample.sampletypes & (flavor)) \ + take_pc_sample((thread), &task->pc_sample, (flavor)); \ + MACRO_END + +#endif /* _KERN_PC_SAMPLE_H_ */ diff --git a/kern/printf.c b/kern/printf.c new file mode 100644 index 0000000..693c660 --- /dev/null +++ b/kern/printf.c @@ -0,0 +1,637 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Common code for printf et al. + * + * The calling routine typically takes a variable number of arguments, + * and passes the address of the first one. This implementation + * assumes a straightforward, stack implementation, aligned to the + * machine's wordsize. Increasing addresses are assumed to point to + * successive arguments (left-to-right), as is the case for a machine + * with a downward-growing stack with arguments pushed right-to-left. + * + * To write, for example, fprintf() using this routine, the code + * + * fprintf(fd, format, args) + * FILE *fd; + * char *format; + * { + * _doprnt(format, &args, fd); + * } + * + * would suffice. (This example does not handle the fprintf's "return + * value" correctly, but who looks at the return value of fprintf + * anyway?) + * + * This version implements the following printf features: + * + * %d decimal conversion + * %u unsigned conversion + * %x hexadecimal conversion + * %X hexadecimal conversion with capital letters + * %o octal conversion + * %c character + * %s string + * %m.n field width, precision + * %-m.n left adjustment + * %0m.n zero-padding + * %*.* width and precision taken from arguments + * + * This version does not implement %f, %e, or %g. It accepts, but + * ignores, an `l' as in %ld, %lo, %lx, and %lu, and therefore will not + * work correctly on machines for which sizeof(long) != sizeof(int). + * It does not even parse %D, %O, or %U; you should be using %ld, %o and + * %lu if you mean long conversion. + * + * As mentioned, this version does not return any reasonable value. + * + * Permission is granted to use, modify, or propagate this code as + * long as this notice is incorporated. + * + * Steve Summit 3/25/87 + */ + +/* + * Added formats for decoding device registers: + * + * printf("reg = %b", regval, "<base><arg>*") + * + * where <base> is the output base expressed as a control character: + * i.e. '\10' gives octal, '\20' gives hex. Each <arg> is a sequence of + * characters, the first of which gives the bit number to be inspected + * (origin 1), and the rest (up to a control character (<= 32)) give the + * name of the register. Thus + * printf("reg = %b\n", 3, "\10\2BITTWO\1BITONE") + * would produce + * reg = 3<BITTWO,BITONE> + * + * If the second character in <arg> is also a control character, it + * indicates the last bit of a bit field. In this case, printf will extract + * bits <1> to <2> and print it. Characters following the second control + * character are printed before the bit field. + * printf("reg = %b\n", 0xb, "\10\4\3FIELD1=\2BITTWO\1BITONE") + * would produce + * reg = b<FIELD1=2,BITONE> + */ +/* + * Added for general use: + * # prefix for alternate format: + * 0x (0X) for hex + * leading 0 for octal + * + print '+' if positive + * blank print ' ' if positive + * + * z signed hexadecimal + * r signed, 'radix' + * n unsigned, 'radix' + * + * D,U,O,Z same as corresponding lower-case versions + * (compatibility) + */ + +#include <mach/boolean.h> +#include <kern/lock.h> +#include <kern/strings.h> +#include <sys/varargs.h> + +#define isdigit(d) ((d) >= '0' && (d) <= '9') +#define Ctod(c) ((c) - '0') + +#define MAXBUF (sizeof(long int) * 8) /* enough for binary */ + + +void printnum( + register unsigned long u, + register int base, + void (*putc)( char, vm_offset_t ), + vm_offset_t putc_arg) +{ + char buf[MAXBUF]; /* build number here */ + register char * p = &buf[MAXBUF-1]; + static char digs[] = "0123456789abcdef"; + + do { + *p-- = digs[u % base]; + u /= base; + } while (u != 0); + + while (++p != &buf[MAXBUF]) + (*putc)(*p, putc_arg); + +} + +boolean_t _doprnt_truncates = FALSE; + +/* printf could be called at _any_ point during system initialization, + including before printf_init() gets called from the "normal" place + in kern/startup.c. */ +boolean_t _doprnt_lock_initialized = FALSE; +decl_simple_lock_data(,_doprnt_lock) + +void printf_init() +{ + if (!_doprnt_lock_initialized) + { + _doprnt_lock_initialized = TRUE; + simple_lock_init(&_doprnt_lock); + } +} + +void _doprnt( + register char *fmt, + va_list *argp, + /* character output routine */ + void (*putc)( char, vm_offset_t), + int radix, /* default radix - for '%r' */ + vm_offset_t putc_arg) +{ + int length; + int prec; + boolean_t ladjust; + char padc; + long n; + unsigned long u; + int plus_sign; + int sign_char; + boolean_t altfmt, truncate; + int base; + register char c; + + printf_init(); + +#if 0 + /* Make sure that we get *some* printout, no matter what */ + simple_lock(&_doprnt_lock); +#else + { + register int i = 0; + while (i < 1*1024*1024) { + if (simple_lock_try(&_doprnt_lock)) + break; + i++; + } + } +#endif + + while ((c = *fmt) != '\0') { + if (c != '%') { + (*putc)(c, putc_arg); + fmt++; + continue; + } + + fmt++; + + length = 0; + prec = -1; + ladjust = FALSE; + padc = ' '; + plus_sign = 0; + sign_char = 0; + altfmt = FALSE; + + while (TRUE) { + c = *fmt; + if (c == '#') { + altfmt = TRUE; + } + else if (c == '-') { + ladjust = TRUE; + } + else if (c == '+') { + plus_sign = '+'; + } + else if (c == ' ') { + if (plus_sign == 0) + plus_sign = ' '; + } + else + break; + fmt++; + } + + if (c == '0') { + padc = '0'; + c = *++fmt; + } + + if (isdigit(c)) { + while(isdigit(c)) { + length = 10 * length + Ctod(c); + c = *++fmt; + } + } + else if (c == '*') { + length = va_arg(*argp, int); + c = *++fmt; + if (length < 0) { + ladjust = !ladjust; + length = -length; + } + } + + if (c == '.') { + c = *++fmt; + if (isdigit(c)) { + prec = 0; + while(isdigit(c)) { + prec = 10 * prec + Ctod(c); + c = *++fmt; + } + } + else if (c == '*') { + prec = va_arg(*argp, int); + c = *++fmt; + } + } + + if (c == 'l') + c = *++fmt; /* need it if sizeof(int) < sizeof(long) */ + + truncate = FALSE; + + switch(c) { + case 'b': + case 'B': + { + register char *p; + boolean_t any; + register int i; + + u = va_arg(*argp, unsigned long); + p = va_arg(*argp, char *); + base = *p++; + printnum(u, base, putc, putc_arg); + + if (u == 0) + break; + + any = FALSE; + while (i = *p++) { + /* NOTE: The '32' here is because ascii space */ + if (*p <= 32) { + /* + * Bit field + */ + register int j; + if (any) + (*putc)(',', putc_arg); + else { + (*putc)('<', putc_arg); + any = TRUE; + } + j = *p++; + for (; (c = *p) > 32; p++) + (*putc)(c, putc_arg); + printnum((unsigned)( (u>>(j-1)) & ((2<<(i-j))-1)), + base, putc, putc_arg); + } + else if (u & (1<<(i-1))) { + if (any) + (*putc)(',', putc_arg); + else { + (*putc)('<', putc_arg); + any = TRUE; + } + for (; (c = *p) > 32; p++) + (*putc)(c, putc_arg); + } + else { + for (; *p > 32; p++) + continue; + } + } + if (any) + (*putc)('>', putc_arg); + break; + } + + case 'c': + c = va_arg(*argp, int); + (*putc)(c, putc_arg); + break; + + case 's': + { + register char *p; + register char *p2; + + if (prec == -1) + prec = 0x7fffffff; /* MAXINT */ + + p = va_arg(*argp, char *); + + if (p == (char *)0) + p = ""; + + if (length > 0 && !ladjust) { + n = 0; + p2 = p; + + for (; *p != '\0' && n < prec; p++) + n++; + + p = p2; + + while (n < length) { + (*putc)(' ', putc_arg); + n++; + } + } + + n = 0; + + while (*p != '\0') { + if (++n > prec) + break; + + (*putc)(*p++, putc_arg); + } + + if (n < length && ladjust) { + while (n < length) { + (*putc)(' ', putc_arg); + n++; + } + } + + break; + } + + case 'o': + truncate = _doprnt_truncates; + case 'O': + base = 8; + goto print_unsigned; + + case 'd': + truncate = _doprnt_truncates; + case 'D': + base = 10; + goto print_signed; + + case 'u': + truncate = _doprnt_truncates; + case 'U': + base = 10; + goto print_unsigned; + + case 'x': + truncate = _doprnt_truncates; + case 'X': + base = 16; + goto print_unsigned; + + case 'z': + truncate = _doprnt_truncates; + case 'Z': + base = 16; + goto print_signed; + + case 'r': + truncate = _doprnt_truncates; + case 'R': + base = radix; + goto print_signed; + + case 'n': + truncate = _doprnt_truncates; + case 'N': + base = radix; + goto print_unsigned; + + print_signed: + n = va_arg(*argp, long); + if (n >= 0) { + u = n; + sign_char = plus_sign; + } + else { + u = -n; + sign_char = '-'; + } + goto print_num; + + print_unsigned: + u = va_arg(*argp, unsigned long); + goto print_num; + + print_num: + { + char buf[MAXBUF]; /* build number here */ + register char * p = &buf[MAXBUF-1]; + static char digits[] = "0123456789abcdef"; + char *prefix = 0; + + if (truncate) u = (long)((int)(u)); + + if (u != 0 && altfmt) { + if (base == 8) + prefix = "0"; + else if (base == 16) + prefix = "0x"; + } + + do { + *p-- = digits[u % base]; + u /= base; + } while (u != 0); + + length -= (&buf[MAXBUF-1] - p); + if (sign_char) + length--; + if (prefix) + length -= strlen(prefix); + + if (padc == ' ' && !ladjust) { + /* blank padding goes before prefix */ + while (--length >= 0) + (*putc)(' ', putc_arg); + } + if (sign_char) + (*putc)(sign_char, putc_arg); + if (prefix) + while (*prefix) + (*putc)(*prefix++, putc_arg); + if (padc == '0') { + /* zero padding goes after sign and prefix */ + while (--length >= 0) + (*putc)('0', putc_arg); + } + while (++p != &buf[MAXBUF]) + (*putc)(*p, putc_arg); + + if (ladjust) { + while (--length >= 0) + (*putc)(' ', putc_arg); + } + break; + } + + case '\0': + fmt--; + break; + + default: + (*putc)(c, putc_arg); + } + fmt++; + } + + simple_unlock(&_doprnt_lock); +} + +/* + * Printing (to console) + */ +extern void cnputc( char, /*not really*/vm_offset_t); + +void vprintf(fmt, listp) + char * fmt; + va_list listp; +{ + _doprnt(fmt, &listp, cnputc, 16, 0); +} + +/*VARARGS1*/ +void printf(fmt, va_alist) + char * fmt; + va_dcl +{ + va_list listp; + va_start(listp); + vprintf(fmt, listp); + va_end(listp); +} + +int indent = 0; + +/* + * Printing (to console) with indentation. + */ +/*VARARGS1*/ +void iprintf(fmt, va_alist) + char * fmt; + va_dcl +{ + va_list listp; + register int i; + + for (i = indent; i > 0; ){ + if (i >= 8) { + printf("\t"); + i -= 8; + } + else { + printf(" "); + i--; + } + } + va_start(listp); + _doprnt(fmt, &listp, cnputc, 16, 0); + va_end(listp); +} + +/* + * Printing to generic buffer + * Returns #bytes printed. + * Strings are zero-terminated. + */ +static void +sputc( + char c, + vm_offset_t arg) +{ + register char **bufp = (char **) arg; + register char *p = *bufp; + *p++ = c; + *bufp = p; +} + +int +sprintf( buf, fmt, va_alist) + char *buf; + char *fmt; + va_dcl +{ + va_list listp; + char *start = buf; + + va_start(listp); + _doprnt(fmt, &listp, sputc, 16, (vm_offset_t)&buf); + va_end(listp); + + *buf = 0; + return (buf - start); +} + + +void safe_gets(str, maxlen) + char *str; + int maxlen; +{ + register char *lp; + register int c; + char *strmax = str + maxlen - 1; /* allow space for trailing 0 */ + + lp = str; + for (;;) { + c = cngetc(); + switch (c) { + case '\n': + case '\r': + printf("\n"); + *lp++ = 0; + return; + + case '\b': + case '#': + case '\177': + if (lp > str) { + printf("\b \b"); + lp--; + } + continue; + + case '@': + case 'u'&037: + lp = str; + printf("\n\r"); + continue; + + default: + if (c >= ' ' && c < '\177') { + if (lp < strmax) { + *lp++ = c; + printf("%c", c); + } + else { + printf("%c", '\007'); /* beep */ + } + } + } + } +} diff --git a/kern/priority.c b/kern/priority.c new file mode 100644 index 0000000..f9a4091 --- /dev/null +++ b/kern/priority.c @@ -0,0 +1,225 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: clock_prim.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Clock primitives. + */ + +#include <cpus.h> + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/machine.h> +#include <kern/host.h> +#include <kern/mach_param.h> +#include <kern/sched.h> +#include <kern/thread.h> +#include <kern/processor.h> +#include <kern/timer.h> +#include <kern/time_out.h> +#include <kern/time_stamp.h> +#include <machine/machspl.h> + + + +/* + * USAGE_THRESHOLD is the amount by which usage must change to + * cause a priority shift that moves a thread between run queues. + */ + +#ifdef PRI_SHIFT_2 +#if PRI_SHIFT_2 > 0 +#define USAGE_THRESHOLD (((1 << PRI_SHIFT) + (1 << PRI_SHIFT_2)) << (2 + SCHED_SHIFT)) +#else /* PRI_SHIFT_2 > 0 */ +#define USAGE_THRESHOLD (((1 << PRI_SHIFT) - (1 << -(PRI_SHIFT_2))) << (2 + SCHED_SHIFT)) +#endif /* PRI_SHIFT_2 > 0 */ +#else /* PRI_SHIFT_2 */ +#define USAGE_THRESHOLD (1 << (PRI_SHIFT + 2 + SCHED_SHIFT)) +#endif /* PRI_SHIFT_2 */ + +/* + * thread_quantum_update: + * + * Recalculate the quantum and priority for a thread. + * The number of ticks that has elapsed since we were last called + * is passed as "nticks." + * + * Called only from clock_interrupt(). + */ + +void thread_quantum_update(mycpu, thread, nticks, state) + register int mycpu; + register thread_t thread; + int nticks; + int state; +{ + register int quantum; + register processor_t myprocessor; +#if NCPUS > 1 + register processor_set_t pset; +#endif + spl_t s; + + myprocessor = cpu_to_processor(mycpu); +#if NCPUS > 1 + pset = myprocessor->processor_set; + if (pset == 0) { + /* + * Processor is being reassigned. + * Should rewrite processor assignment code to + * block clock interrupts. + */ + return; + } +#endif /* NCPUS > 1 */ + + /* + * Account for thread's utilization of these ticks. + * This assumes that there is *always* a current thread. + * When the processor is idle, it should be the idle thread. + */ + + /* + * Update set_quantum and calculate the current quantum. + */ +#if NCPUS > 1 + pset->set_quantum = pset->machine_quantum[ + ((pset->runq.count > pset->processor_count) ? + pset->processor_count : pset->runq.count)]; + + if (myprocessor->runq.count != 0) + quantum = min_quantum; + else + quantum = pset->set_quantum; +#else /* NCPUS > 1 */ + quantum = min_quantum; + default_pset.set_quantum = quantum; +#endif /* NCPUS > 1 */ + + /* + * Now recompute the priority of the thread if appropriate. + */ + + if (state != CPU_STATE_IDLE) { + myprocessor->quantum -= nticks; +#if NCPUS > 1 + /* + * Runtime quantum adjustment. Use quantum_adj_index + * to avoid synchronizing quantum expirations. + */ + if ((quantum != myprocessor->last_quantum) && + (pset->processor_count > 1)) { + myprocessor->last_quantum = quantum; + simple_lock(&pset->quantum_adj_lock); + quantum = min_quantum + (pset->quantum_adj_index * + (quantum - min_quantum)) / + (pset->processor_count - 1); + if (++(pset->quantum_adj_index) >= + pset->processor_count) + pset->quantum_adj_index = 0; + simple_unlock(&pset->quantum_adj_lock); + } +#endif /* NCPUS > 1 */ + if (myprocessor->quantum <= 0) { + s = splsched(); + thread_lock(thread); + if (thread->sched_stamp != sched_tick) { + update_priority(thread); + } + else { + if ( +#if MACH_FIXPRI + (thread->policy == POLICY_TIMESHARE) && +#endif /* MACH_FIXPRI */ + (thread->depress_priority < 0)) { + thread_timer_delta(thread); + thread->sched_usage += + thread->sched_delta; + thread->sched_delta = 0; + compute_my_priority(thread); + } + } + thread_unlock(thread); + (void) splx(s); + /* + * This quantum is up, give this thread another. + */ + myprocessor->first_quantum = FALSE; +#if MACH_FIXPRI + if (thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + myprocessor->quantum += quantum; +#if MACH_FIXPRI + } + else { + /* + * Fixed priority has per-thread quantum. + * + */ + myprocessor->quantum += thread->sched_data; + } +#endif /* MACH_FIXPRI */ + } + /* + * Recompute priority if appropriate. + */ + else { + s = splsched(); + thread_lock(thread); + if (thread->sched_stamp != sched_tick) { + update_priority(thread); + } + else { + if ( +#if MACH_FIXPRI + (thread->policy == POLICY_TIMESHARE) && +#endif /* MACH_FIXPRI */ + (thread->depress_priority < 0)) { + thread_timer_delta(thread); + if (thread->sched_delta >= USAGE_THRESHOLD) { + thread->sched_usage += + thread->sched_delta; + thread->sched_delta = 0; + compute_my_priority(thread); + } + } + } + thread_unlock(thread); + (void) splx(s); + } + /* + * Check for and schedule ast if needed. + */ + ast_check(); + } +} + diff --git a/kern/processor.c b/kern/processor.c new file mode 100644 index 0000000..ad788a9 --- /dev/null +++ b/kern/processor.c @@ -0,0 +1,1039 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * processor.c: processor and processor_set manipulation routines. + */ + +#include <cpus.h> +#include <mach_fixpri.h> +#include <mach_host.h> + +#include <mach/boolean.h> +#include <mach/policy.h> +#include <mach/processor_info.h> +#include <mach/vm_param.h> +#include <kern/cpu_number.h> +#include <kern/lock.h> +#include <kern/host.h> +#include <kern/processor.h> +#include <kern/sched.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/ipc_host.h> +#include <ipc/ipc_port.h> + +#if MACH_HOST +#include <kern/zalloc.h> +zone_t pset_zone; +#endif /* MACH_HOST */ + + +/* + * Exported variables. + */ +struct processor_set default_pset; +struct processor processor_array[NCPUS]; + +queue_head_t all_psets; +int all_psets_count; +decl_simple_lock_data(, all_psets_lock); + +processor_t master_processor; +processor_t processor_ptr[NCPUS]; + +/* + * Forward declarations. + */ +void quantum_set(processor_set_t); +void pset_init(processor_set_t); +void processor_init(processor_t, int); + +/* + * Bootstrap the processor/pset system so the scheduler can run. + */ +void pset_sys_bootstrap(void) +{ + register int i; + + pset_init(&default_pset); + default_pset.empty = FALSE; + for (i = 0; i < NCPUS; i++) { + /* + * Initialize processor data structures. + * Note that cpu_to_processor(i) is processor_ptr[i]. + */ + processor_ptr[i] = &processor_array[i]; + processor_init(processor_ptr[i], i); + } + master_processor = cpu_to_processor(master_cpu); + queue_init(&all_psets); + simple_lock_init(&all_psets_lock); + queue_enter(&all_psets, &default_pset, processor_set_t, all_psets); + all_psets_count = 1; + default_pset.active = TRUE; + default_pset.empty = FALSE; + + /* + * Note: the default_pset has a max_priority of BASEPRI_USER. + * Internal kernel threads override this in kernel_thread. + */ +} + +#if MACH_HOST +/* + * Rest of pset system initializations. + */ +void pset_sys_init(void) +{ + register int i; + register processor_t processor; + + /* + * Allocate the zone for processor sets. + */ + pset_zone = zinit(sizeof(struct processor_set), 128*PAGE_SIZE, + PAGE_SIZE, 0, "processor sets"); + + /* + * Give each processor a control port. + * The master processor already has one. + */ + for (i = 0; i < NCPUS; i++) { + processor = cpu_to_processor(i); + if (processor != master_processor && + machine_slot[i].is_cpu) + { + ipc_processor_init(processor); + } + } +} +#endif /* MACH_HOST */ + +/* + * Initialize the given processor_set structure. + */ + +void pset_init( + register processor_set_t pset) +{ + int i; + + simple_lock_init(&pset->runq.lock); + pset->runq.low = 0; + pset->runq.count = 0; + for (i = 0; i < NRQS; i++) { + queue_init(&(pset->runq.runq[i])); + } + queue_init(&pset->idle_queue); + pset->idle_count = 0; + simple_lock_init(&pset->idle_lock); + queue_init(&pset->processors); + pset->processor_count = 0; + pset->empty = TRUE; + queue_init(&pset->tasks); + pset->task_count = 0; + queue_init(&pset->threads); + pset->thread_count = 0; + pset->ref_count = 1; + simple_lock_init(&pset->ref_lock); + queue_init(&pset->all_psets); + pset->active = FALSE; + simple_lock_init(&pset->lock); + pset->pset_self = IP_NULL; + pset->pset_name_self = IP_NULL; + pset->max_priority = BASEPRI_USER; +#if MACH_FIXPRI + pset->policies = POLICY_TIMESHARE; +#endif /* MACH_FIXPRI */ + pset->set_quantum = min_quantum; +#if NCPUS > 1 + pset->quantum_adj_index = 0; + simple_lock_init(&pset->quantum_adj_lock); + + for (i = 0; i <= NCPUS; i++) { + pset->machine_quantum[i] = min_quantum; + } +#endif /* NCPUS > 1 */ + pset->mach_factor = 0; + pset->load_average = 0; + pset->sched_load = SCHED_SCALE; /* i.e. 1 */ +} + +/* + * Initialize the given processor structure for the processor in + * the slot specified by slot_num. + */ + +void processor_init( + register processor_t pr, + int slot_num) +{ + int i; + + simple_lock_init(&pr->runq.lock); + pr->runq.low = 0; + pr->runq.count = 0; + for (i = 0; i < NRQS; i++) { + queue_init(&(pr->runq.runq[i])); + } + queue_init(&pr->processor_queue); + pr->state = PROCESSOR_OFF_LINE; + pr->next_thread = THREAD_NULL; + pr->idle_thread = THREAD_NULL; + pr->quantum = 0; + pr->first_quantum = FALSE; + pr->last_quantum = 0; + pr->processor_set = PROCESSOR_SET_NULL; + pr->processor_set_next = PROCESSOR_SET_NULL; + queue_init(&pr->processors); + simple_lock_init(&pr->lock); + pr->processor_self = IP_NULL; + pr->slot_num = slot_num; +} + +/* + * pset_remove_processor() removes a processor from a processor_set. + * It can only be called on the current processor. Caller must + * hold lock on current processor and processor set. + */ + +void pset_remove_processor( + processor_set_t pset, + processor_t processor) +{ + if (pset != processor->processor_set) + panic("pset_remove_processor: wrong pset"); + + queue_remove(&pset->processors, processor, processor_t, processors); + processor->processor_set = PROCESSOR_SET_NULL; + pset->processor_count--; + quantum_set(pset); +} + +/* + * pset_add_processor() adds a processor to a processor_set. + * It can only be called on the current processor. Caller must + * hold lock on curent processor and on pset. No reference counting on + * processors. Processor reference to pset is implicit. + */ + +void pset_add_processor( + processor_set_t pset, + processor_t processor) +{ + queue_enter(&pset->processors, processor, processor_t, processors); + processor->processor_set = pset; + pset->processor_count++; + quantum_set(pset); +} + +/* + * pset_remove_task() removes a task from a processor_set. + * Caller must hold locks on pset and task. Pset reference count + * is not decremented; caller must explicitly pset_deallocate. + */ + +void pset_remove_task( + processor_set_t pset, + task_t task) +{ + if (pset != task->processor_set) + return; + + queue_remove(&pset->tasks, task, task_t, pset_tasks); + task->processor_set = PROCESSOR_SET_NULL; + pset->task_count--; +} + +/* + * pset_add_task() adds a task to a processor_set. + * Caller must hold locks on pset and task. Pset references to + * tasks are implicit. + */ + +void pset_add_task( + processor_set_t pset, + task_t task) +{ + queue_enter(&pset->tasks, task, task_t, pset_tasks); + task->processor_set = pset; + pset->task_count++; +} + +/* + * pset_remove_thread() removes a thread from a processor_set. + * Caller must hold locks on pset and thread. Pset reference count + * is not decremented; caller must explicitly pset_deallocate. + */ + +void pset_remove_thread( + processor_set_t pset, + thread_t thread) +{ + queue_remove(&pset->threads, thread, thread_t, pset_threads); + thread->processor_set = PROCESSOR_SET_NULL; + pset->thread_count--; +} + +/* + * pset_add_thread() adds a thread to a processor_set. + * Caller must hold locks on pset and thread. Pset references to + * threads are implicit. + */ + +void pset_add_thread( + processor_set_t pset, + thread_t thread) +{ + queue_enter(&pset->threads, thread, thread_t, pset_threads); + thread->processor_set = pset; + pset->thread_count++; +} + +/* + * thread_change_psets() changes the pset of a thread. Caller must + * hold locks on both psets and thread. The old pset must be + * explicitly pset_deallocat()'ed by caller. + */ + +void thread_change_psets( + thread_t thread, + processor_set_t old_pset, + processor_set_t new_pset) +{ + queue_remove(&old_pset->threads, thread, thread_t, pset_threads); + old_pset->thread_count--; + queue_enter(&new_pset->threads, thread, thread_t, pset_threads); + thread->processor_set = new_pset; + new_pset->thread_count++; +} + +/* + * pset_deallocate: + * + * Remove one reference to the processor set. Destroy processor_set + * if this was the last reference. + */ +void pset_deallocate( + processor_set_t pset) +{ + if (pset == PROCESSOR_SET_NULL) + return; + + pset_ref_lock(pset); + if (--pset->ref_count > 0) { + pset_ref_unlock(pset); + return; + } +#if !MACH_HOST + panic("pset_deallocate: default_pset destroyed"); +#endif /* !MACH_HOST */ + +#if MACH_HOST + /* + * Reference count is zero, however the all_psets list + * holds an implicit reference and may make new ones. + * Its lock also dominates the pset lock. To check for this, + * temporarily restore one reference, and then lock the + * other structures in the right order. + */ + pset->ref_count = 1; + pset_ref_unlock(pset); + + simple_lock(&all_psets_lock); + pset_ref_lock(pset); + if (--pset->ref_count > 0) { + /* + * Made an extra reference. + */ + pset_ref_unlock(pset); + simple_unlock(&all_psets_lock); + return; + } + + /* + * Ok to destroy pset. Make a few paranoia checks. + */ + + if ((pset == &default_pset) || (pset->thread_count > 0) || + (pset->task_count > 0) || pset->processor_count > 0) { + panic("pset_deallocate: destroy default or active pset"); + } + /* + * Remove from all_psets queue. + */ + queue_remove(&all_psets, pset, processor_set_t, all_psets); + all_psets_count--; + + pset_ref_unlock(pset); + simple_unlock(&all_psets_lock); + + /* + * That's it, free data structure. + */ + zfree(pset_zone, (vm_offset_t)pset); +#endif /* MACH_HOST */ +} + +/* + * pset_reference: + * + * Add one reference to the processor set. + */ +void pset_reference( + processor_set_t pset) +{ + pset_ref_lock(pset); + pset->ref_count++; + pset_ref_unlock(pset); +} + +kern_return_t +processor_info( + register processor_t processor, + int flavor, + host_t *host, + processor_info_t info, + natural_t *count) +{ + register int slot_num, state; + register processor_basic_info_t basic_info; + + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + + if (flavor != PROCESSOR_BASIC_INFO || + *count < PROCESSOR_BASIC_INFO_COUNT) + return KERN_FAILURE; + + basic_info = (processor_basic_info_t) info; + + slot_num = processor->slot_num; + basic_info->cpu_type = machine_slot[slot_num].cpu_type; + basic_info->cpu_subtype = machine_slot[slot_num].cpu_subtype; + state = processor->state; + if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) + basic_info->running = FALSE; + else + basic_info->running = TRUE; + basic_info->slot_num = slot_num; + if (processor == master_processor) + basic_info->is_master = TRUE; + else + basic_info->is_master = FALSE; + + *count = PROCESSOR_BASIC_INFO_COUNT; + *host = &realhost; + return KERN_SUCCESS; +} + +kern_return_t processor_start( + processor_t processor) +{ + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; +#if NCPUS > 1 + return cpu_start(processor->slot_num); +#else /* NCPUS > 1 */ + return KERN_FAILURE; +#endif /* NCPUS > 1 */ +} + +kern_return_t processor_exit( + processor_t processor) +{ + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + +#if NCPUS > 1 + return processor_shutdown(processor); +#else /* NCPUS > 1 */ + return KERN_FAILURE; +#endif /* NCPUS > 1 */ +} + +kern_return_t +processor_control( + processor_t processor, + processor_info_t info, + natural_t count) +{ + if (processor == PROCESSOR_NULL) + return KERN_INVALID_ARGUMENT; + +#if NCPUS > 1 + return cpu_control(processor->slot_num, (int *)info, count); +#else /* NCPUS > 1 */ + return KERN_FAILURE; +#endif /* NCPUS > 1 */ +} + +/* + * Precalculate the appropriate system quanta based on load. The + * index into machine_quantum is the number of threads on the + * processor set queue. It is limited to the number of processors in + * the set. + */ + +void quantum_set( + processor_set_t pset) +{ +#if NCPUS > 1 + register int i,ncpus; + + ncpus = pset->processor_count; + + for ( i=1 ; i <= ncpus ; i++) { + pset->machine_quantum[i] = + ((min_quantum * ncpus) + (i/2)) / i ; + } + pset->machine_quantum[0] = 2 * pset->machine_quantum[1]; + + i = ((pset->runq.count > pset->processor_count) ? + pset->processor_count : pset->runq.count); + pset->set_quantum = pset->machine_quantum[i]; +#else /* NCPUS > 1 */ + default_pset.set_quantum = min_quantum; +#endif /* NCPUS > 1 */ +} + +#if MACH_HOST +/* + * processor_set_create: + * + * Create and return a new processor set. + */ + +kern_return_t +processor_set_create( + host_t host, + processor_set_t *new_set, + processor_set_t *new_name) +{ + processor_set_t pset; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + pset = (processor_set_t) zalloc(pset_zone); + pset_init(pset); + pset_reference(pset); /* for new_set out argument */ + pset_reference(pset); /* for new_name out argument */ + ipc_pset_init(pset); + pset->active = TRUE; + + simple_lock(&all_psets_lock); + queue_enter(&all_psets, pset, processor_set_t, all_psets); + all_psets_count++; + simple_unlock(&all_psets_lock); + + ipc_pset_enable(pset); + + *new_set = pset; + *new_name = pset; + return KERN_SUCCESS; +} + +/* + * processor_set_destroy: + * + * destroy a processor set. Any tasks, threads or processors + * currently assigned to it are reassigned to the default pset. + */ +kern_return_t processor_set_destroy( + processor_set_t pset) +{ + register queue_entry_t elem; + register queue_head_t *list; + + if (pset == PROCESSOR_SET_NULL || pset == &default_pset) + return KERN_INVALID_ARGUMENT; + + /* + * Handle multiple termination race. First one through sets + * active to FALSE and disables ipc access. + */ + pset_lock(pset); + if (!(pset->active)) { + pset_unlock(pset); + return KERN_FAILURE; + } + + pset->active = FALSE; + ipc_pset_disable(pset); + + + /* + * Now reassign everything in this set to the default set. + */ + + if (pset->task_count > 0) { + list = &pset->tasks; + while (!queue_empty(list)) { + elem = queue_first(list); + task_reference((task_t) elem); + pset_unlock(pset); + task_assign((task_t) elem, &default_pset, FALSE); + task_deallocate((task_t) elem); + pset_lock(pset); + } + } + + if (pset->thread_count > 0) { + list = &pset->threads; + while (!queue_empty(list)) { + elem = queue_first(list); + thread_reference((thread_t) elem); + pset_unlock(pset); + thread_assign((thread_t) elem, &default_pset); + thread_deallocate((thread_t) elem); + pset_lock(pset); + } + } + + if (pset->processor_count > 0) { + list = &pset->processors; + while(!queue_empty(list)) { + elem = queue_first(list); + pset_unlock(pset); + processor_assign((processor_t) elem, &default_pset, TRUE); + pset_lock(pset); + } + } + + pset_unlock(pset); + + /* + * Destroy ipc state. + */ + ipc_pset_terminate(pset); + + /* + * Deallocate pset's reference to itself. + */ + pset_deallocate(pset); + return KERN_SUCCESS; +} + +#else /* MACH_HOST */ + +kern_return_t +processor_set_create( + host_t host, + processor_set_t *new_set, + processor_set_t *new_name) +{ +#ifdef lint + host++; new_set++; new_name++; +#endif /* lint */ + return KERN_FAILURE; +} + +kern_return_t processor_set_destroy( + processor_set_t pset) +{ +#ifdef lint + pset++; +#endif /* lint */ + return KERN_FAILURE; +} + +#endif MACH_HOST + +kern_return_t +processor_get_assignment( + processor_t processor, + processor_set_t *pset) +{ + int state; + + state = processor->state; + if (state == PROCESSOR_SHUTDOWN || state == PROCESSOR_OFF_LINE) + return KERN_FAILURE; + + *pset = processor->processor_set; + pset_reference(*pset); + return KERN_SUCCESS; +} + +kern_return_t +processor_set_info( + processor_set_t pset, + int flavor, + host_t *host, + processor_set_info_t info, + natural_t *count) +{ + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + if (flavor == PROCESSOR_SET_BASIC_INFO) { + register processor_set_basic_info_t basic_info; + + if (*count < PROCESSOR_SET_BASIC_INFO_COUNT) + return KERN_FAILURE; + + basic_info = (processor_set_basic_info_t) info; + + pset_lock(pset); + basic_info->processor_count = pset->processor_count; + basic_info->task_count = pset->task_count; + basic_info->thread_count = pset->thread_count; + basic_info->mach_factor = pset->mach_factor; + basic_info->load_average = pset->load_average; + pset_unlock(pset); + + *count = PROCESSOR_SET_BASIC_INFO_COUNT; + *host = &realhost; + return KERN_SUCCESS; + } + else if (flavor == PROCESSOR_SET_SCHED_INFO) { + register processor_set_sched_info_t sched_info; + + if (*count < PROCESSOR_SET_SCHED_INFO_COUNT) + return KERN_FAILURE; + + sched_info = (processor_set_sched_info_t) info; + + pset_lock(pset); +#if MACH_FIXPRI + sched_info->policies = pset->policies; +#else /* MACH_FIXPRI */ + sched_info->policies = POLICY_TIMESHARE; +#endif /* MACH_FIXPRI */ + sched_info->max_priority = pset->max_priority; + pset_unlock(pset); + + *count = PROCESSOR_SET_SCHED_INFO_COUNT; + *host = &realhost; + return KERN_SUCCESS; + } + + *host = HOST_NULL; + return KERN_INVALID_ARGUMENT; +} + +/* + * processor_set_max_priority: + * + * Specify max priority permitted on processor set. This affects + * newly created and assigned threads. Optionally change existing + * ones. + */ +kern_return_t +processor_set_max_priority( + processor_set_t pset, + int max_priority, + boolean_t change_threads) +{ + if (pset == PROCESSOR_SET_NULL || invalid_pri(max_priority)) + return KERN_INVALID_ARGUMENT; + + pset_lock(pset); + pset->max_priority = max_priority; + + if (change_threads) { + register queue_head_t *list; + register thread_t thread; + + list = &pset->threads; + queue_iterate(list, thread, thread_t, pset_threads) { + if (thread->max_priority < max_priority) + thread_max_priority(thread, pset, max_priority); + } + } + + pset_unlock(pset); + + return KERN_SUCCESS; +} + +/* + * processor_set_policy_enable: + * + * Allow indicated policy on processor set. + */ + +kern_return_t +processor_set_policy_enable( + processor_set_t pset, + int policy) +{ + if ((pset == PROCESSOR_SET_NULL) || invalid_policy(policy)) + return KERN_INVALID_ARGUMENT; + +#if MACH_FIXPRI + pset_lock(pset); + pset->policies |= policy; + pset_unlock(pset); + + return KERN_SUCCESS; +#else /* MACH_FIXPRI */ + if (policy == POLICY_TIMESHARE) + return KERN_SUCCESS; + else + return KERN_FAILURE; +#endif /* MACH_FIXPRI */ +} + +/* + * processor_set_policy_disable: + * + * Forbid indicated policy on processor set. Time sharing cannot + * be forbidden. + */ + +kern_return_t +processor_set_policy_disable( + processor_set_t pset, + int policy, + boolean_t change_threads) +{ + if ((pset == PROCESSOR_SET_NULL) || policy == POLICY_TIMESHARE || + invalid_policy(policy)) + return KERN_INVALID_ARGUMENT; + +#if MACH_FIXPRI + pset_lock(pset); + + /* + * Check if policy enabled. Disable if so, then handle + * change_threads. + */ + if (pset->policies & policy) { + pset->policies &= ~policy; + + if (change_threads) { + register queue_head_t *list; + register thread_t thread; + + list = &pset->threads; + queue_iterate(list, thread, thread_t, pset_threads) { + if (thread->policy == policy) + thread_policy(thread, POLICY_TIMESHARE, 0); + } + } + } + pset_unlock(pset); +#endif /* MACH_FIXPRI */ + + return KERN_SUCCESS; +} + +#define THING_TASK 0 +#define THING_THREAD 1 + +/* + * processor_set_things: + * + * Common internals for processor_set_{threads,tasks} + */ +kern_return_t +processor_set_things( + processor_set_t pset, + mach_port_t **thing_list, + natural_t *count, + int type) +{ + unsigned int actual; /* this many things */ + int i; + + vm_size_t size, size_needed; + vm_offset_t addr; + + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + return KERN_FAILURE; + } + + if (type == THING_TASK) + actual = pset->task_count; + else + actual = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(mach_port_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the processor_set is locked & active */ + + switch (type) { + case THING_TASK: { + task_t *tasks = (task_t *) addr; + task_t task; + + for (i = 0, task = (task_t) queue_first(&pset->tasks); + i < actual; + i++, task = (task_t) queue_next(&task->pset_tasks)) { + /* take ref for convert_task_to_port */ + task_reference(task); + tasks[i] = task; + } + assert(queue_end(&pset->tasks, (queue_entry_t) task)); + break; + } + + case THING_THREAD: { + thread_t *threads = (thread_t *) addr; + thread_t thread; + + for (i = 0, thread = (thread_t) queue_first(&pset->threads); + i < actual; + i++, + thread = (thread_t) queue_next(&thread->pset_threads)) { + /* take ref for convert_thread_to_port */ + thread_reference(thread); + threads[i] = thread; + } + assert(queue_end(&pset->threads, (queue_entry_t) thread)); + break; + } + } + + /* can unlock processor set now that we have the task/thread refs */ + pset_unlock(pset); + + if (actual == 0) { + /* no things, so return null pointer and deallocate memory */ + *thing_list = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + vm_offset_t newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + switch (type) { + case THING_TASK: { + task_t *tasks = (task_t *) addr; + + for (i = 0; i < actual; i++) + task_deallocate(tasks[i]); + break; + } + + case THING_THREAD: { + thread_t *threads = (thread_t *) addr; + + for (i = 0; i < actual; i++) + thread_deallocate(threads[i]); + break; + } + } + kfree(addr, size); + return KERN_RESOURCE_SHORTAGE; + } + + bcopy((char *) addr, (char *) newaddr, size_needed); + kfree(addr, size); + addr = newaddr; + } + + *thing_list = (mach_port_t *) addr; + *count = actual; + + /* do the conversion that Mig should handle */ + + switch (type) { + case THING_TASK: { + task_t *tasks = (task_t *) addr; + + for (i = 0; i < actual; i++) + ((mach_port_t *) tasks)[i] = + (mach_port_t)convert_task_to_port(tasks[i]); + break; + } + + case THING_THREAD: { + thread_t *threads = (thread_t *) addr; + + for (i = 0; i < actual; i++) + ((mach_port_t *) threads)[i] = + (mach_port_t)convert_thread_to_port(threads[i]); + break; + } + } + } + + return KERN_SUCCESS; +} + + +/* + * processor_set_tasks: + * + * List all tasks in the processor set. + */ +kern_return_t +processor_set_tasks( + processor_set_t pset, + task_array_t *task_list, + natural_t *count) +{ + return processor_set_things(pset, task_list, count, THING_TASK); +} + +/* + * processor_set_threads: + * + * List all threads in the processor set. + */ +kern_return_t +processor_set_threads( + processor_set_t pset, + thread_array_t *thread_list, + natural_t *count) +{ + return processor_set_things(pset, thread_list, count, THING_THREAD); +} diff --git a/kern/processor.h b/kern/processor.h new file mode 100644 index 0000000..8de7a68 --- /dev/null +++ b/kern/processor.h @@ -0,0 +1,327 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * processor.h: Processor and processor-set definitions. + */ + +#ifndef _KERN_PROCESSOR_H_ +#define _KERN_PROCESSOR_H_ + +/* + * Data structures for managing processors and sets of processors. + */ + +#include <cpus.h> +#include <mach_fixpri.h> +#include <mach_host.h> + +#include <mach/boolean.h> +#include <mach/kern_return.h> +#include <mach/port.h> +#include <mach/processor_info.h> +#include <kern/cpu_number.h> +#include <kern/lock.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/kern_types.h> +#include <kern/host.h> + +#if NCPUS > 1 +#include <machine/ast_types.h> +#endif /* NCPUS > 1 */ + +struct processor_set { + struct run_queue runq; /* runq for this set */ + queue_head_t idle_queue; /* idle processors */ + int idle_count; /* how many ? */ + decl_simple_lock_data(, idle_lock) /* lock for above */ + queue_head_t processors; /* all processors here */ + int processor_count; /* how many ? */ + boolean_t empty; /* true if no processors */ + queue_head_t tasks; /* tasks assigned */ + int task_count; /* how many */ + queue_head_t threads; /* threads in this set */ + int thread_count; /* how many */ + int ref_count; /* structure ref count */ + decl_simple_lock_data(, ref_lock) /* lock for ref count */ + queue_chain_t all_psets; /* link for all_psets */ + boolean_t active; /* is pset in use */ + decl_simple_lock_data(, lock) /* lock for everything else */ + struct ipc_port * pset_self; /* port for operations */ + struct ipc_port * pset_name_self; /* port for information */ + int max_priority; /* maximum priority */ +#if MACH_FIXPRI + int policies; /* bit vector for policies */ +#endif /* MACH_FIXPRI */ + int set_quantum; /* current default quantum */ +#if NCPUS > 1 + int quantum_adj_index; /* runtime quantum adj. */ + decl_simple_lock_data(, quantum_adj_lock) /* lock for above */ + int machine_quantum[NCPUS+1]; /* ditto */ +#endif /* NCPUS > 1 */ + long mach_factor; /* mach_factor */ + long load_average; /* load_average */ + long sched_load; /* load avg for scheduler */ +}; +extern struct processor_set default_pset; + +struct processor { + struct run_queue runq; /* local runq for this processor */ + /* XXX want to do this round robin eventually */ + queue_chain_t processor_queue; /* idle/assign/shutdown queue link */ + int state; /* See below */ + struct thread *next_thread; /* next thread to run if dispatched */ + struct thread *idle_thread; /* this processor's idle thread. */ + int quantum; /* quantum for current thread */ + boolean_t first_quantum; /* first quantum in succession */ + int last_quantum; /* last quantum assigned */ + + processor_set_t processor_set; /* processor set I belong to */ + processor_set_t processor_set_next; /* set I will belong to */ + queue_chain_t processors; /* all processors in set */ + decl_simple_lock_data(, lock) + struct ipc_port *processor_self; /* port for operations */ + int slot_num; /* machine-indep slot number */ +#if NCPUS > 1 + ast_check_t ast_check_data; /* for remote ast_check invocation */ +#endif /* NCPUS > 1 */ + /* punt id data temporarily */ +}; +typedef struct processor Processor; +extern struct processor processor_array[NCPUS]; + +/* + * Chain of all processor sets. + */ +extern queue_head_t all_psets; +extern int all_psets_count; +decl_simple_lock_data(extern, all_psets_lock); + +/* + * The lock ordering is: + * + * all_psets_lock + * | + * | + * V + * pset_lock + * | + * +-----------+---------------+-------------------+ + * | | | | + * | | | | + * | | V V + * | | task_lock pset_self->ip_lock + * | | | | + * | | +-----------+---------------+ | + * | | | | | + * | V V V V + * | thread_lock* pset_ref_lock + * | | + * | +-------+ + * | | | + * | | V + * | | runq_lock* + * | | + * V V + * processor_lock* + * | + * | + * V + * pset_idle_lock* + * | + * | + * V + * action_lock* + * + * Locks marked with "*" are taken at splsched. + */ + +/* + * XXX need a pointer to the master processor structure + */ + +extern processor_t master_processor; + +/* + * NOTE: The processor->processor_set link is needed in one of the + * scheduler's critical paths. [Figure out where to look for another + * thread to run on this processor.] It is accessed without locking. + * The following access protocol controls this field. + * + * Read from own processor - just read. + * Read from another processor - lock processor structure during read. + * Write from own processor - lock processor structure during write. + * Write from another processor - NOT PERMITTED. + * + */ + +/* + * Processor state locking: + * + * Values for the processor state are defined below. If the processor + * is off-line or being shutdown, then it is only necessary to lock + * the processor to change its state. Otherwise it is only necessary + * to lock its processor set's idle_lock. Scheduler code will + * typically lock only the idle_lock, but processor manipulation code + * will often lock both. + */ + +#define PROCESSOR_OFF_LINE 0 /* Not in system */ +#define PROCESSOR_RUNNING 1 /* Running normally */ +#define PROCESSOR_IDLE 2 /* idle */ +#define PROCESSOR_DISPATCHING 3 /* dispatching (idle -> running) */ +#define PROCESSOR_ASSIGN 4 /* Assignment is changing */ +#define PROCESSOR_SHUTDOWN 5 /* Being shutdown */ + +/* + * Use processor ptr array to find current processor's data structure. + * This replaces a multiplication (index into processor_array) with + * an array lookup and a memory reference. It also allows us to save + * space if processor numbering gets too sparse. + */ + +extern processor_t processor_ptr[NCPUS]; + +#define cpu_to_processor(i) (processor_ptr[i]) + +#define current_processor() (processor_ptr[cpu_number()]) +#define current_processor_set() (current_processor()->processor_set) + +/* Compatibility -- will go away */ + +#define cpu_state(slot_num) (processor_ptr[slot_num]->state) +#define cpu_idle(slot_num) (cpu_state(slot_num) == PROCESSOR_IDLE) + +/* Useful lock macros */ + +#define pset_lock(pset) simple_lock(&(pset)->lock) +#define pset_unlock(pset) simple_unlock(&(pset)->lock) +#define pset_ref_lock(pset) simple_lock(&(pset)->ref_lock) +#define pset_ref_unlock(pset) simple_unlock(&(pset)->ref_lock) + +#define processor_lock(pr) simple_lock(&(pr)->lock) +#define processor_unlock(pr) simple_unlock(&(pr)->lock) + +typedef mach_port_t *processor_array_t; +typedef mach_port_t *processor_set_array_t; +typedef mach_port_t *processor_set_name_array_t; + + +/* + * Exported functions + */ + +/* Initialization */ + +#ifdef KERNEL +#if MACH_HOST +extern void pset_sys_bootstrap(void); +extern void pset_sys_init(void); +#endif /* MACH_HOST */ + +/* Pset internal functions */ + +extern void pset_reference(processor_set_t); +extern void pset_deallocate(processor_set_t); +extern void pset_remove_processor(processor_set_t, processor_t); +extern void pset_add_processor(processor_set_t, processor_t); +extern void pset_remove_task(processor_set_t, struct task *); +extern void pset_add_task(processor_set_t, struct task *); +extern void pset_remove_thread(processor_set_t, struct thread *); +extern void pset_add_thread(processor_set_t, struct thread *); +extern void thread_change_psets(struct thread *, + processor_set_t, processor_set_t); + +/* Processor interface */ + +extern kern_return_t processor_get_assignment( + processor_t processor, + processor_set_t *processor_set); + +extern kern_return_t processor_info( + processor_t processor, + int flavor, + host_t * host, + processor_info_t info, + natural_t * count); + +extern kern_return_t processor_start( + processor_t processor); + +extern kern_return_t processor_exit( + processor_t processor); + +extern kern_return_t processor_control( + processor_t processor, + processor_info_t info, + natural_t count); + +/* Pset interface */ + +extern kern_return_t processor_set_create( + host_t host, + processor_set_t *new_set, + processor_set_t *new_name); + +extern kern_return_t processor_set_destroy( + processor_set_t pset); + +extern kern_return_t processor_set_info( + processor_set_t pset, + int flavor, + host_t *host, + processor_set_info_t info, + natural_t *count); + +extern kern_return_t processor_set_max_priority( + processor_set_t pset, + int max_priority, + boolean_t change_threads); + +extern kern_return_t processor_set_policy_enable( + processor_set_t pset, + int policy); + +extern kern_return_t processor_set_policy_disable( + processor_set_t pset, + int policy, + boolean_t change_threads); + +extern kern_return_t processor_set_tasks( + processor_set_t pset, + task_array_t *task_list, + natural_t *count); + +extern kern_return_t processor_set_threads( + processor_set_t pset, + thread_array_t *thread_list, + natural_t *count); +#endif + +#endif /* _KERN_PROCESSOR_H_ */ diff --git a/kern/profile.c b/kern/profile.c new file mode 100644 index 0000000..7513934 --- /dev/null +++ b/kern/profile.c @@ -0,0 +1,413 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Copyright 1991 by Open Software Foundation, + * Grenoble, FRANCE + * + * All Rights Reserved + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of OSF or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, + * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, + * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#if 0 + +#include <kern/thread.h> +#include <kern/queue.h> +#include <mach/profil.h> +#include <kern/sched_prim.h> +#include <ipc/ipc_space.h> + +extern vm_map_t kernel_map; /* can be discarded, defined in <vm/vm_kern.h> */ + +thread_t profile_thread_id = THREAD_NULL; + + +void profile_thread() +{ + struct message { + mach_msg_header_t head; + mach_msg_type_t type; + int arg[SIZE_PROF_BUFFER+1]; + } msg; + + register spl_t s; + buf_to_send_t buf_entry; + queue_entry_t prof_queue_entry; + prof_data_t pbuf; + simple_lock_t lock; + msg_return_t mr; + int j; + + /* Initialise the queue header for the prof_queue */ + mpqueue_init(&prof_queue); + + /* Template initialisation of header and type structures */ + msg.head.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE); + msg.head.msgh_size = sizeof(msg); + msg.head.msgh_local_port = MACH_PORT_NULL; + msg.head.msgh_kind = MACH_MSGH_KIND_NORMAL; + msg.head.msgh_id = 666666; + + msg.type.msgt_name = MACH_MSG_TYPE_INTEGER_32; + msg.type.msgt_size = 32; + msg.type.msgt_number = SIZE_PROF_BUFFER+1; + msg.type.msgt_inline = TRUE; + msg.type.msgt_longform = FALSE; + msg.type.msgt_deallocate = FALSE; + msg.type.msgt_unused = 0; + + while (TRUE) { + + /* Dequeue the first buffer. */ + s = splsched(); + mpdequeue_head(&prof_queue, &prof_queue_entry); + splx(s); + + if ((buf_entry = (buf_to_send_t) prof_queue_entry) == NULLBTS) + { + thread_sleep((event_t) profile_thread, lock, TRUE); + if (current_thread()->wait_result != THREAD_AWAKENED) + break; + } + else { + task_t curr_task; + thread_t curr_th; + register int *sample; + int curr_buf; + int imax; + + curr_th = (thread_t) buf_entry->thread; + curr_buf = (int) buf_entry->number; + pbuf = curr_th->profil_buffer; + + /* Set the remote port */ + msg.head.msgh_remote_port = (mach_port_t) pbuf->prof_port; + + + sample = pbuf->prof_area[curr_buf].p_zone; + imax = pbuf->prof_area[curr_buf].p_index; + for(j=0 ;j<imax; j++,sample++) + msg.arg[j] = *sample; + + /* Let hardclock() know you've finished the dirty job */ + pbuf->prof_area[curr_buf].p_full = FALSE; + + /* + * Store the number of samples actually sent + * as the last element of the array. + */ + msg.arg[SIZE_PROF_BUFFER] = imax; + + mr = mach_msg(&(msg.head), MACH_SEND_MSG, + sizeof(struct message), 0, + MACH_PORT_NULL, MACH_MSG_TIMEOUT_NONE, + MACH_PORT_NULL); + + if (mr != MACH_MSG_SUCCESS) { +printf("profile_thread: mach_msg failed returned %x\n",(int)mr); + } + + if (buf_entry->wakeme) + thread_wakeup((event_t) &buf_entry->wakeme); + kmem_free(kernel_map, (buf_to_send_t) buf_entry, + sizeof(struct buf_to_send)); + + } + + } + /* The profile thread has been signalled to exit. There may still + be sample data queued for us, which we must now throw away. + Once we set profile_thread_id to null, hardclock() will stop + queueing any additional samples, so we do not need to alter + the interrupt level. */ + profile_thread_id = THREAD_NULL; + while (1) { + mpdequeue_head(&prof_queue, &prof_queue_entry); + if ((buf_entry = (buf_to_send_t) prof_queue_entry) == NULLBTS) + break; + if (buf_entry->wakeme) + thread_wakeup((event_t) &buf_entry->wakeme); + kmem_free(kernel_map, (buf_to_send_t) buf_entry, + sizeof(struct buf_to_send)); + } + + thread_halt_self(); +} + + + +#include <mach/message.h> + +void +send_last_sample_buf(th) +thread_t th; +{ + register spl_t s; + buf_to_send_t buf_entry; + vm_offset_t vm_buf_entry; + + if (th->profil_buffer == NULLPBUF) + return; + + /* Ask for the sending of the last PC buffer. + * Make a request to the profile_thread by inserting + * the buffer in the send queue, and wake it up. + * The last buffer must be inserted at the head of the + * send queue, so the profile_thread handles it immediatly. + */ + if (kmem_alloc( kernel_map, &vm_buf_entry, + sizeof(struct buf_to_send)) != KERN_SUCCESS) + return; + buf_entry = (buf_to_send_t) vm_buf_entry; + buf_entry->thread = (int *) th; + buf_entry->number = th->profil_buffer->prof_index; + + /* Watch out in case profile thread exits while we are about to + queue data for it. */ + s = splsched(); + if (profile_thread_id != THREAD_NULL) { + simple_lock_t lock; + buf_entry->wakeme = 1; + mpenqueue_tail( &prof_queue, &(buf_entry->list)); + thread_wakeup((event_t) profile_thread); + assert_wait((event_t) &buf_entry->wakeme, TRUE); + splx(s); + thread_block((void (*)()) 0); + } else { + splx(s); + kmem_free(kernel_map, vm_buf_entry, sizeof(struct buf_to_send)); + } +} + +/* + * Profile current thread + */ + +profile(pc) { + + /* Find out which thread has been interrupted. */ + thread_t it_thread = current_thread(); + int inout_val = pc; + buf_to_send_t buf_entry; + vm_offset_t vm_buf_entry; + int *val; + /* + * Test if the current thread is to be sampled + */ + if (it_thread->thread_profiled) { + /* Inserts the PC value in the buffer of the thread */ + set_pbuf_value(it_thread->profil_buffer, &inout_val); + switch(inout_val) { + case 0: + if (profile_thread_id == THREAD_NULL) { + reset_pbuf_area(it_thread->profil_buffer); + } else printf("ERROR : hardclock : full buffer unsent\n"); + break; + case 1: + /* Normal case, value successfully inserted */ + break; + case 2 : + /* + * The value we have just inserted caused the + * buffer to be full, and ready to be sent. + * If profile_thread_id is null, the profile + * thread has been killed. Since this generally + * happens only when the O/S server task of which + * it is a part is killed, it is not a great loss + * to throw away the data. + */ + if (profile_thread_id == THREAD_NULL || + kmem_alloc(kernel_map, + &vm_buf_entry , + sizeof(struct buf_to_send)) != + KERN_SUCCESS) { + reset_pbuf_area(it_thread->profil_buffer); + break; + } + buf_entry = (buf_to_send_t) vm_buf_entry; + buf_entry->thread = (int *)it_thread; + buf_entry->number = + (it_thread->profil_buffer)->prof_index; + mpenqueue_tail(&prof_queue, &(buf_entry->list)); + + /* Switch to another buffer */ + reset_pbuf_area(it_thread->profil_buffer); + + /* Wake up the profile thread */ + if (profile_thread_id != THREAD_NULL) + thread_wakeup((event_t) profile_thread); + break; + + default: + printf("ERROR: profile : unexpected case\n"); + } + } +} + + +/* The task parameter in this and the subsequent routine is needed for + MiG, even though it is not used in the function itself. */ + +kern_return_t +mach_sample_thread (task, reply, cur_thread) +ipc_space_t task; +ipc_object_t reply; +thread_t cur_thread; +{ +/* + * This routine is called every time that a new thread has made + * a request for the sampling service. We must keep track of the + * correspondance between it's identity (cur_thread) and the port + * we are going to use as a reply port to send out the samples resulting + * from its execution. + */ + prof_data_t pbuf; + vm_offset_t vmpbuf; + + if (reply != MACH_PORT_NULL) { + if (cur_thread->thread_profiled && cur_thread->thread_profiled_own) { + if (reply == cur_thread->profil_buffer->prof_port) + return KERN_SUCCESS; + mach_sample_thread(MACH_PORT_NULL, cur_thread); + } + /* Start profiling this thread , do the initialization. */ + alloc_pbuf_area(pbuf, vmpbuf); + if ((cur_thread->profil_buffer = pbuf) == NULLPBUF) { +printf("ERROR:mach_sample_thread:cannot allocate pbuf\n"); + return KERN_RESOURCE_SHORTAGE; + } else { + if (!set_pbuf_nb(pbuf, NB_PROF_BUFFER-1)) { +printf("ERROR:mach_sample_thread:cannot set pbuf_nb\n"); + return KERN_FAILURE; + } + reset_pbuf_area(pbuf); + } + + pbuf->prof_port = reply; + cur_thread->thread_profiled = TRUE; + cur_thread->thread_profiled_own = TRUE; + if (profile_thread_id == THREAD_NULL) + profile_thread_id = kernel_thread(current_task(), profile_thread); + } else { + if (!cur_thread->thread_profiled_own) + cur_thread->thread_profiled = FALSE; + if (!cur_thread->thread_profiled) + return KERN_SUCCESS; + + send_last_sample_buf(cur_thread); + + /* Stop profiling this thread, do the cleanup. */ + + cur_thread->thread_profiled_own = FALSE; + cur_thread->thread_profiled = FALSE; + dealloc_pbuf_area(cur_thread->profil_buffer); + cur_thread->profil_buffer = NULLPBUF; + } + + return KERN_SUCCESS; +} + +kern_return_t +mach_sample_task (task, reply, cur_task) +ipc_space_t task; +ipc_object_t reply; +task_t cur_task; +{ + prof_data_t pbuf=cur_task->profil_buffer; + vm_offset_t vmpbuf; + int turnon = (reply != MACH_PORT_NULL); + + if (turnon) { + if (cur_task->task_profiled) { + if (cur_task->profil_buffer->prof_port == reply) + return KERN_SUCCESS; + (void) mach_sample_task(task, MACH_PORT_NULL, cur_task); + } + if (pbuf == NULLPBUF) { + alloc_pbuf_area(pbuf, vmpbuf); + if (pbuf == NULLPBUF) { + return KERN_RESOURCE_SHORTAGE; + } + cur_task->profil_buffer = pbuf; + } + if (!set_pbuf_nb(pbuf, NB_PROF_BUFFER-1)) { + return KERN_FAILURE; + } + reset_pbuf_area(pbuf); + pbuf->prof_port = reply; + } + + if (turnon != cur_task->task_profiled) { + int actual,i,sentone; + thread_t thread; + + if (turnon && profile_thread_id == THREAD_NULL) + profile_thread_id = + kernel_thread(current_task(), profile_thread); + cur_task->task_profiled = turnon; + actual = cur_task->thread_count; + sentone = 0; + for (i=0, thread=(thread_t) queue_first(&cur_task->thread_list); + i < actual; + i++, thread=(thread_t) queue_next(&thread->thread_list)) { + if (!thread->thread_profiled_own) { + thread->thread_profiled = turnon; + if (turnon) + thread->profil_buffer = cur_task->profil_buffer; + else if (!sentone) { + send_last_sample_buf(thread); + sentone = 1; + } + } + } + if (!turnon) { + dealloc_pbuf_area(pbuf); + cur_task->profil_buffer = NULLPBUF; + } + } + + return KERN_SUCCESS; +} + +#endif 0 diff --git a/kern/queue.c b/kern/queue.c new file mode 100644 index 0000000..98b74c2 --- /dev/null +++ b/kern/queue.c @@ -0,0 +1,131 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Routines to implement queue package. + */ + +#include <kern/queue.h> + + + +/* + * Insert element at head of queue. + */ +void enqueue_head( + register queue_t que, + register queue_entry_t elt) +{ + elt->next = que->next; + elt->prev = que; + elt->next->prev = elt; + que->next = elt; +} + +/* + * Insert element at tail of queue. + */ +void enqueue_tail( + register queue_t que, + register queue_entry_t elt) +{ + elt->next = que; + elt->prev = que->prev; + elt->prev->next = elt; + que->prev = elt; +} + +/* + * Remove and return element at head of queue. + */ +queue_entry_t dequeue_head( + register queue_t que) +{ + register queue_entry_t elt; + + if (que->next == que) + return((queue_entry_t)0); + + elt = que->next; + elt->next->prev = que; + que->next = elt->next; + return(elt); +} + +/* + * Remove and return element at tail of queue. + */ +queue_entry_t dequeue_tail( + register queue_t que) +{ + register queue_entry_t elt; + + if (que->prev == que) + return((queue_entry_t)0); + + elt = que->prev; + elt->prev->next = que; + que->prev = elt->prev; + return(elt); +} + +/* + * Remove arbitrary element from queue. + * Does not check whether element is on queue - the world + * will go haywire if it isn't. + */ + +/*ARGSUSED*/ +void remqueue( + queue_t que, + register queue_entry_t elt) +{ + elt->next->prev = elt->prev; + elt->prev->next = elt->next; +} + +/* + * Routines to directly imitate the VAX hardware queue + * package. + */ +void insque( + register struct queue_entry *entry, + register struct queue_entry *pred) +{ + entry->next = pred->next; + entry->prev = pred; + (pred->next)->prev = entry; + pred->next = entry; +} + +struct queue_entry +*remque( + register struct queue_entry *elt) +{ + (elt->next)->prev = elt->prev; + (elt->prev)->next = elt->next; + return(elt); +} + diff --git a/kern/queue.h b/kern/queue.h new file mode 100644 index 0000000..2f8f792 --- /dev/null +++ b/kern/queue.h @@ -0,0 +1,369 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon rights + * to redistribute these changes. + */ +/* + * File: queue.h + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * Type definitions for generic queues. + * + */ + +#ifndef _KERN_QUEUE_H_ +#define _KERN_QUEUE_H_ + +#include <kern/lock.h> + +/* + * Queue of abstract objects. Queue is maintained + * within that object. + * + * Supports fast removal from within the queue. + * + * How to declare a queue of elements of type "foo_t": + * In the "*foo_t" type, you must have a field of + * type "queue_chain_t" to hold together this queue. + * There may be more than one chain through a + * "foo_t", for use by different queues. + * + * Declare the queue as a "queue_t" type. + * + * Elements of the queue (of type "foo_t", that is) + * are referred to by reference, and cast to type + * "queue_entry_t" within this module. + */ + +/* + * A generic doubly-linked list (queue). + */ + +struct queue_entry { + struct queue_entry *next; /* next element */ + struct queue_entry *prev; /* previous element */ +}; + +typedef struct queue_entry *queue_t; +typedef struct queue_entry queue_head_t; +typedef struct queue_entry queue_chain_t; +typedef struct queue_entry *queue_entry_t; + +/* + * enqueue puts "elt" on the "queue". + * dequeue returns the first element in the "queue". + * remqueue removes the specified "elt" from the specified "queue". + */ + +#define enqueue(queue,elt) enqueue_tail(queue, elt) +#define dequeue(queue) dequeue_head(queue) + +void enqueue_head(); +void enqueue_tail(); +queue_entry_t dequeue_head(); +queue_entry_t dequeue_tail(); +void remqueue(); + +/* + * Macro: queue_init + * Function: + * Initialize the given queue. + * Header: + * void queue_init(q) + * queue_t q; *MODIFIED* + */ +#define queue_init(q) ((q)->next = (q)->prev = q) + +/* + * Macro: queue_first + * Function: + * Returns the first entry in the queue, + * Header: + * queue_entry_t queue_first(q) + * queue_t q; *IN* + */ +#define queue_first(q) ((q)->next) + +/* + * Macro: queue_next + * Function: + * Returns the entry after an item in the queue. + * Header: + * queue_entry_t queue_next(qc) + * queue_t qc; + */ +#define queue_next(qc) ((qc)->next) + +/* + * Macro: queue_last + * Function: + * Returns the last entry in the queue. + * Header: + * queue_entry_t queue_last(q) + * queue_t q; *IN* + */ +#define queue_last(q) ((q)->prev) + +/* + * Macro: queue_prev + * Function: + * Returns the entry before an item in the queue. + * Header: + * queue_entry_t queue_prev(qc) + * queue_t qc; + */ +#define queue_prev(qc) ((qc)->prev) + +/* + * Macro: queue_end + * Function: + * Tests whether a new entry is really the end of + * the queue. + * Header: + * boolean_t queue_end(q, qe) + * queue_t q; + * queue_entry_t qe; + */ +#define queue_end(q, qe) ((q) == (qe)) + +/* + * Macro: queue_empty + * Function: + * Tests whether a queue is empty. + * Header: + * boolean_t queue_empty(q) + * queue_t q; + */ +#define queue_empty(q) queue_end((q), queue_first(q)) + + +/*----------------------------------------------------------------*/ +/* + * Macros that operate on generic structures. The queue + * chain may be at any location within the structure, and there + * may be more than one chain. + */ + +/* + * Macro: queue_enter + * Function: + * Insert a new element at the tail of the queue. + * Header: + * void queue_enter(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_enter(head, elt, type, field) \ +{ \ + register queue_entry_t prev; \ + \ + prev = (head)->prev; \ + if ((head) == prev) { \ + (head)->next = (queue_entry_t) (elt); \ + } \ + else { \ + ((type)prev)->field.next = (queue_entry_t)(elt);\ + } \ + (elt)->field.prev = prev; \ + (elt)->field.next = head; \ + (head)->prev = (queue_entry_t) elt; \ +} + +/* + * Macro: queue_enter_first + * Function: + * Insert a new element at the head of the queue. + * Header: + * void queue_enter_first(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_enter_first(head, elt, type, field) \ +{ \ + register queue_entry_t next; \ + \ + next = (head)->next; \ + if ((head) == next) { \ + (head)->prev = (queue_entry_t) (elt); \ + } \ + else { \ + ((type)next)->field.prev = (queue_entry_t)(elt);\ + } \ + (elt)->field.next = next; \ + (elt)->field.prev = head; \ + (head)->next = (queue_entry_t) elt; \ +} + +/* + * Macro: queue_field [internal use only] + * Function: + * Find the queue_chain_t (or queue_t) for the + * given element (thing) in the given queue (head) + */ +#define queue_field(head, thing, type, field) \ + (((head) == (thing)) ? (head) : &((type)(thing))->field) + +/* + * Macro: queue_remove + * Function: + * Remove an arbitrary item from the queue. + * Header: + * void queue_remove(q, qe, type, field) + * arguments as in queue_enter + */ +#define queue_remove(head, elt, type, field) \ +{ \ + register queue_entry_t next, prev; \ + \ + next = (elt)->field.next; \ + prev = (elt)->field.prev; \ + \ + if ((head) == next) \ + (head)->prev = prev; \ + else \ + ((type)next)->field.prev = prev; \ + \ + if ((head) == prev) \ + (head)->next = next; \ + else \ + ((type)prev)->field.next = next; \ +} + +/* + * Macro: queue_remove_first + * Function: + * Remove and return the entry at the head of + * the queue. + * Header: + * queue_remove_first(head, entry, type, field) + * entry is returned by reference + */ +#define queue_remove_first(head, entry, type, field) \ +{ \ + register queue_entry_t next; \ + \ + (entry) = (type) ((head)->next); \ + next = (entry)->field.next; \ + \ + if ((head) == next) \ + (head)->prev = (head); \ + else \ + ((type)(next))->field.prev = (head); \ + (head)->next = next; \ +} + +/* + * Macro: queue_remove_last + * Function: + * Remove and return the entry at the tail of + * the queue. + * Header: + * queue_remove_last(head, entry, type, field) + * entry is returned by reference + */ +#define queue_remove_last(head, entry, type, field) \ +{ \ + register queue_entry_t prev; \ + \ + (entry) = (type) ((head)->prev); \ + prev = (entry)->field.prev; \ + \ + if ((head) == prev) \ + (head)->next = (head); \ + else \ + ((type)(prev))->field.next = (head); \ + (head)->prev = prev; \ +} + +/* + * Macro: queue_assign + */ +#define queue_assign(to, from, type, field) \ +{ \ + ((type)((from)->prev))->field.next = (to); \ + ((type)((from)->next))->field.prev = (to); \ + *to = *from; \ +} + +/* + * Macro: queue_iterate + * Function: + * iterate over each item in the queue. + * Generates a 'for' loop, setting elt to + * each item in turn (by reference). + * Header: + * queue_iterate(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_iterate(head, elt, type, field) \ + for ((elt) = (type) queue_first(head); \ + !queue_end((head), (queue_entry_t)(elt)); \ + (elt) = (type) queue_next(&(elt)->field)) + + + +/*----------------------------------------------------------------*/ +/* + * Define macros for queues with locks. + */ +struct mpqueue_head { + struct queue_entry head; /* header for queue */ + struct slock lock; /* lock for queue */ +}; + +typedef struct mpqueue_head mpqueue_head_t; + +#define round_mpq(size) (size) + +#define mpqueue_init(q) \ + { \ + queue_init(&(q)->head); \ + simple_lock_init(&(q)->lock); \ + } + +#define mpenqueue_tail(q, elt) \ + simple_lock(&(q)->lock); \ + enqueue_tail(&(q)->head, elt); \ + simple_unlock(&(q)->lock); + +#define mpdequeue_head(q, elt) \ + simple_lock(&(q)->lock); \ + if (queue_empty(&(q)->head)) \ + *(elt) = 0; \ + else \ + *(elt) = dequeue_head(&(q)->head); \ + simple_unlock(&(q)->lock); + +/* + * Old queue stuff, will go away soon. + */ + +#endif _KERN_QUEUE_H_ diff --git a/kern/refcount.h b/kern/refcount.h new file mode 100644 index 0000000..7fd6cdf --- /dev/null +++ b/kern/refcount.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: refcount.h + * + * This defines the system-independent part of the atomic reference count data type. + * + */ + +#ifndef _KERN_REFCOUNT_H_ +#define _KERN_REFCOUNT_H_ + +#include <kern/macro_help.h> + +#include "refcount.h" /*XXX*/ + +/* Unless the above include file specified otherwise, + use the system-independent (unoptimized) atomic reference counter. */ +#ifndef MACHINE_REFCOUNT + +#include <kern/lock.h> + +struct RefCount { + decl_simple_lock_data(,lock) /* lock for reference count */ + int ref_count; /* number of references */ +}; +typedef struct RefCount RefCount; + +#define refcount_init(refcount, refs) \ + MACRO_BEGIN \ + simple_lock_init(&(refcount)->lock); \ + ((refcount)->ref_count = (refs)); \ + MACRO_END + +#define refcount_take(refcount) \ + MACRO_BEGIN \ + simple_lock(&(refcount)->lock); \ + (refcount)->ref_count++; \ + simple_unlock(&(refcount)->lock); \ + MACRO_END + +#define refcount_drop(refcount, func) \ + MACRO_BEGIN \ + int new_value; \ + simple_lock(&(refcount)->lock); \ + new_value = --(refcount)->ref_count; \ + simple_unlock(&(refcount)->lock); \ + if (new_value == 0) { func; } \ + MACRO_END + +#endif + +#endif _KERN_REFCOUNT_H_ diff --git a/kern/sched.h b/kern/sched.h new file mode 100644 index 0000000..756384b --- /dev/null +++ b/kern/sched.h @@ -0,0 +1,181 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: sched.h + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * Header file for scheduler. + * + */ + +#ifndef _KERN_SCHED_H_ +#define _KERN_SCHED_H_ + +#include <cpus.h> +#include <mach_fixpri.h> +#include <simple_clock.h> +#include <stat_time.h> + +#include <kern/queue.h> +#include <kern/lock.h> +#include <kern/macro_help.h> + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif MACH_FIXPRI + +#if STAT_TIME + +/* + * Statistical timing uses microseconds as timer units. 18 bit shift + * yields priorities. PRI_SHIFT_2 isn't needed. + */ +#define PRI_SHIFT 18 + +#else STAT_TIME + +/* + * Otherwise machine provides shift(s) based on time units it uses. + */ +#include <machine/sched_param.h> + +#endif STAT_TIME +#define NRQS 32 /* 32 run queues per cpu */ + +struct run_queue { + queue_head_t runq[NRQS]; /* one for each priority */ + decl_simple_lock_data(, lock) /* one lock for all queues */ + int low; /* low queue value */ + int count; /* count of threads runable */ +}; + +typedef struct run_queue *run_queue_t; +#define RUN_QUEUE_NULL ((run_queue_t) 0) + +#if MACH_FIXPRI +/* + * NOTE: For fixed priority threads, first_quantum indicates + * whether context switch at same priority is ok. For timeshareing + * it indicates whether preempt is ok. + */ + +#define csw_needed(thread, processor) ((thread)->state & TH_SUSP || \ + ((processor)->runq.count > 0) || \ + ((thread)->policy == POLICY_TIMESHARE && \ + (processor)->first_quantum == FALSE && \ + (processor)->processor_set->runq.count > 0 && \ + (processor)->processor_set->runq.low <= \ + (thread)->sched_pri) || \ + ((thread)->policy == POLICY_FIXEDPRI && \ + (processor)->processor_set->runq.count > 0 && \ + ((((processor)->first_quantum == FALSE) && \ + ((processor)->processor_set->runq.low <= \ + (thread)->sched_pri)) || \ + ((processor)->processor_set->runq.low < \ + (thread)->sched_pri)))) + +#else MACH_FIXPRI +#define csw_needed(thread, processor) ((thread)->state & TH_SUSP || \ + ((processor)->runq.count > 0) || \ + ((processor)->first_quantum == FALSE && \ + ((processor)->processor_set->runq.count > 0 && \ + (processor)->processor_set->runq.low <= \ + ((thread)->sched_pri)))) +#endif MACH_FIXPRI + +/* + * Scheduler routines. + */ + +extern struct run_queue *rem_runq(); +extern struct thread *choose_thread(); +extern queue_head_t action_queue; /* assign/shutdown queue */ +decl_simple_lock_data(extern,action_lock); + +extern int min_quantum; /* defines max context switch rate */ + +/* + * Default base priorities for threads. + */ +#define BASEPRI_SYSTEM 6 +#define BASEPRI_USER 12 + +/* + * Macro to check for invalid priorities. + */ + +#define invalid_pri(pri) (((pri) < 0) || ((pri) >= NRQS)) + +/* + * Shift structures for holding update shifts. Actual computation + * is usage = (usage >> shift1) +/- (usage >> abs(shift2)) where the + * +/- is determined by the sign of shift 2. + */ +struct shift { + int shift1; + int shift2; +}; + +typedef struct shift *shift_t, shift_data_t; + +/* + * sched_tick increments once a second. Used to age priorities. + */ + +extern unsigned sched_tick; + +#define SCHED_SCALE 128 +#define SCHED_SHIFT 7 + +/* + * thread_timer_delta macro takes care of both thread timers. + */ + +#define thread_timer_delta(thread) \ +MACRO_BEGIN \ + register unsigned delta; \ + \ + delta = 0; \ + TIMER_DELTA((thread)->system_timer, \ + (thread)->system_timer_save, delta); \ + TIMER_DELTA((thread)->user_timer, \ + (thread)->user_timer_save, delta); \ + (thread)->cpu_delta += delta; \ + (thread)->sched_delta += delta * \ + (thread)->processor_set->sched_load; \ +MACRO_END + +#if SIMPLE_CLOCK +/* + * sched_usec is an exponential average of number of microseconds + * in a second for clock drift compensation. + */ + +extern int sched_usec; +#endif SIMPLE_CLOCK + +#endif _KERN_SCHED_H_ diff --git a/kern/sched_prim.c b/kern/sched_prim.c new file mode 100644 index 0000000..b17e612 --- /dev/null +++ b/kern/sched_prim.c @@ -0,0 +1,2062 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: sched_prim.c + * Author: Avadis Tevanian, Jr. + * Date: 1986 + * + * Scheduling primitives + * + */ + +#include <cpus.h> +#include <simple_clock.h> +#include <mach_fixpri.h> +#include <mach_host.h> +#include <hw_footprint.h> +#include <fast_tas.h> +#include <power_save.h> + +#include <mach/machine.h> +#include <kern/ast.h> +#include <kern/counters.h> +#include <kern/cpu_number.h> +#include <kern/lock.h> +#include <kern/macro_help.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/syscall_subr.h> +#include <kern/thread.h> +#include <kern/thread_swap.h> +#include <kern/time_out.h> +#include <vm/pmap.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <machine/machspl.h> /* For def'n of splsched() */ + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif /* MACH_FIXPRI */ + + +extern int hz; + +int min_quantum; /* defines max context switch rate */ + +unsigned sched_tick; + +#if SIMPLE_CLOCK +int sched_usec; +#endif /* SIMPLE_CLOCK */ + +thread_t sched_thread_id; + +void recompute_priorities(void); /* forward */ +void update_priority(thread_t); +void set_pri(thread_t, int, boolean_t); +void do_thread_scan(void); + +thread_t choose_pset_thread(); + +timer_elt_data_t recompute_priorities_timer; + +#if DEBUG +void checkrq(run_queue_t, char *); +void thread_check(thread_t, run_queue_t); +#endif + +/* + * State machine + * + * states are combinations of: + * R running + * W waiting (or on wait queue) + * S suspended (or will suspend) + * N non-interruptible + * + * init action + * assert_wait thread_block clear_wait suspend resume + * + * R RW, RWN R; setrun - RS - + * RS RWS, RWNS S; wake_active - - R + * RN RWN RN; setrun - RNS - + * RNS RWNS RNS; setrun - - RN + * + * RW W R RWS - + * RWN WN RN RWNS - + * RWS WS; wake_active RS - RW + * RWNS WNS RNS - RWN + * + * W R; setrun WS - + * WN RN; setrun WNS - + * WNS RNS; setrun - WN + * + * S - - R + * WS S - W + * + */ + +/* + * Waiting protocols and implementation: + * + * Each thread may be waiting for exactly one event; this event + * is set using assert_wait(). That thread may be awakened either + * by performing a thread_wakeup_prim() on its event, + * or by directly waking that thread up with clear_wait(). + * + * The implementation of wait events uses a hash table. Each + * bucket is queue of threads having the same hash function + * value; the chain for the queue (linked list) is the run queue + * field. [It is not possible to be waiting and runnable at the + * same time.] + * + * Locks on both the thread and on the hash buckets govern the + * wait event field and the queue chain field. Because wakeup + * operations only have the event as an argument, the event hash + * bucket must be locked before any thread. + * + * Scheduling operations may also occur at interrupt level; therefore, + * interrupts below splsched() must be prevented when holding + * thread or hash bucket locks. + * + * The wait event hash table declarations are as follows: + */ + +#define NUMQUEUES 59 + +queue_head_t wait_queue[NUMQUEUES]; +decl_simple_lock_data(, wait_lock[NUMQUEUES]) + +/* NOTE: we want a small positive integer out of this */ +#define wait_hash(event) \ + ((((int)(event) < 0) ? ~(int)(event) : (int)(event)) % NUMQUEUES) + +void wait_queue_init(void) +{ + register int i; + + for (i = 0; i < NUMQUEUES; i++) { + queue_init(&wait_queue[i]); + simple_lock_init(&wait_lock[i]); + } +} + +void sched_init(void) +{ + recompute_priorities_timer.fcn = (int (*)())recompute_priorities; + recompute_priorities_timer.param = (char *)0; + + min_quantum = hz / 10; /* context switch 10 times/second */ + wait_queue_init(); + pset_sys_bootstrap(); /* initialize processer mgmt. */ + queue_init(&action_queue); + simple_lock_init(&action_lock); + sched_tick = 0; +#if SIMPLE_CLOCK + sched_usec = 0; +#endif /* SIMPLE_CLOCK */ + ast_init(); +} + +/* + * Thread timeout routine, called when timer expires. + * Called at splsoftclock. + */ +void thread_timeout( + thread_t thread) +{ + assert(thread->timer.set == TELT_UNSET); + + clear_wait(thread, THREAD_TIMED_OUT, FALSE); +} + +/* + * thread_set_timeout: + * + * Set a timer for the current thread, if the thread + * is ready to wait. Must be called between assert_wait() + * and thread_block(). + */ + +void thread_set_timeout( + int t) /* timeout interval in ticks */ +{ + register thread_t thread = current_thread(); + register spl_t s; + + s = splsched(); + thread_lock(thread); + if ((thread->state & TH_WAIT) != 0) { + set_timeout(&thread->timer, t); + } + thread_unlock(thread); + splx(s); +} + +/* + * Set up thread timeout element when thread is created. + */ +void thread_timeout_setup( + register thread_t thread) +{ + thread->timer.fcn = (int (*)())thread_timeout; + thread->timer.param = (char *)thread; + thread->depress_timer.fcn = (int (*)())thread_depress_timeout; + thread->depress_timer.param = (char *)thread; +} + +/* + * assert_wait: + * + * Assert that the current thread is about to go to + * sleep until the specified event occurs. + */ +void assert_wait( + event_t event, + boolean_t interruptible) +{ + register queue_t q; + register int index; + register thread_t thread; +#if MACH_SLOCKS + register simple_lock_t lock; +#endif /* MACH_SLOCKS */ + spl_t s; + + thread = current_thread(); + if (thread->wait_event != 0) { + panic("assert_wait: already asserted event %#x\n", + thread->wait_event); + } + s = splsched(); + if (event != 0) { + index = wait_hash(event); + q = &wait_queue[index]; +#if MACH_SLOCKS + lock = &wait_lock[index]; +#endif /* MACH_SLOCKS */ + simple_lock(lock); + thread_lock(thread); + enqueue_tail(q, (queue_entry_t) thread); + thread->wait_event = event; + if (interruptible) + thread->state |= TH_WAIT; + else + thread->state |= TH_WAIT | TH_UNINT; + thread_unlock(thread); + simple_unlock(lock); + } + else { + thread_lock(thread); + if (interruptible) + thread->state |= TH_WAIT; + else + thread->state |= TH_WAIT | TH_UNINT; + thread_unlock(thread); + } + splx(s); +} + +/* + * clear_wait: + * + * Clear the wait condition for the specified thread. Start the thread + * executing if that is appropriate. + * + * parameters: + * thread thread to awaken + * result Wakeup result the thread should see + * interrupt_only Don't wake up the thread if it isn't + * interruptible. + */ +void clear_wait( + register thread_t thread, + int result, + boolean_t interrupt_only) +{ + register int index; + register queue_t q; +#if MACH_SLOCKS + register simple_lock_t lock; +#endif /* MACH_SLOCKS */ + register event_t event; + spl_t s; + + s = splsched(); + thread_lock(thread); + if (interrupt_only && (thread->state & TH_UNINT)) { + /* + * can`t interrupt thread + */ + thread_unlock(thread); + splx(s); + return; + } + + event = thread->wait_event; + if (event != 0) { + thread_unlock(thread); + index = wait_hash(event); + q = &wait_queue[index]; +#if MACH_SLOCKS + lock = &wait_lock[index]; +#endif /* MACH_SLOCKS */ + simple_lock(lock); + /* + * If the thread is still waiting on that event, + * then remove it from the list. If it is waiting + * on a different event, or no event at all, then + * someone else did our job for us. + */ + thread_lock(thread); + if (thread->wait_event == event) { + remqueue(q, (queue_entry_t)thread); + thread->wait_event = 0; + event = 0; /* cause to run below */ + } + simple_unlock(lock); + } + if (event == 0) { + register int state = thread->state; + + reset_timeout_check(&thread->timer); + + switch (state & TH_SCHED_STATE) { + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread->wait_result = result; + thread_setrun(thread, TRUE); + break; + + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Either already running, or suspended. + */ + thread->state = state &~ TH_WAIT; + thread->wait_result = result; + break; + + default: + /* + * Not waiting. + */ + break; + } + } + thread_unlock(thread); + splx(s); +} + +/* + * thread_wakeup_prim: + * + * Common routine for thread_wakeup, thread_wakeup_with_result, + * and thread_wakeup_one. + * + */ +void thread_wakeup_prim( + event_t event, + boolean_t one_thread, + int result) +{ + register queue_t q; + register int index; + register thread_t thread, next_th; +#if MACH_SLOCKS + register simple_lock_t lock; +#endif /* MACH_SLOCKS */ + spl_t s; + register int state; + + index = wait_hash(event); + q = &wait_queue[index]; + s = splsched(); +#if MACH_SLOCKS + lock = &wait_lock[index]; +#endif /* MACH_SLOCKS */ + simple_lock(lock); + thread = (thread_t) queue_first(q); + while (!queue_end(q, (queue_entry_t)thread)) { + next_th = (thread_t) queue_next((queue_t) thread); + + if (thread->wait_event == event) { + thread_lock(thread); + remqueue(q, (queue_entry_t) thread); + thread->wait_event = 0; + reset_timeout_check(&thread->timer); + + state = thread->state; + switch (state & TH_SCHED_STATE) { + + case TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_UNINT: + case TH_WAIT: + /* + * Sleeping and not suspendable - put + * on run queue. + */ + thread->state = (state &~ TH_WAIT) | TH_RUN; + thread->wait_result = result; + thread_setrun(thread, TRUE); + break; + + case TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Either already running, or suspended. + */ + thread->state = state &~ TH_WAIT; + thread->wait_result = result; + break; + + default: + panic("thread_wakeup"); + break; + } + thread_unlock(thread); + if (one_thread) + break; + } + thread = next_th; + } + simple_unlock(lock); + splx(s); +} + +/* + * thread_sleep: + * + * Cause the current thread to wait until the specified event + * occurs. The specified lock is unlocked before releasing + * the cpu. (This is a convenient way to sleep without manually + * calling assert_wait). + */ +void thread_sleep( + event_t event, + simple_lock_t lock, + boolean_t interruptible) +{ + assert_wait(event, interruptible); /* assert event */ + simple_unlock(lock); /* release the lock */ + thread_block((void (*)()) 0); /* block ourselves */ +} + +/* + * thread_bind: + * + * Force a thread to execute on the specified processor. + * If the thread is currently executing, it may wait until its + * time slice is up before switching onto the specified processor. + * + * A processor of PROCESSOR_NULL causes the thread to be unbound. + * xxx - DO NOT export this to users. + */ +void thread_bind( + register thread_t thread, + processor_t processor) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + thread->bound_processor = processor; + thread_unlock(thread); + (void) splx(s); +} + +/* + * Select a thread for this processor (the current processor) to run. + * May select the current thread. + * Assumes splsched. + */ + +thread_t thread_select( + register processor_t myprocessor) +{ + register thread_t thread; + + myprocessor->first_quantum = TRUE; + /* + * Check for obvious simple case; local runq is + * empty and global runq has entry at hint. + */ + if (myprocessor->runq.count > 0) { + thread = choose_thread(myprocessor); + myprocessor->quantum = min_quantum; + } + else { + register processor_set_t pset; + +#if MACH_HOST + pset = myprocessor->processor_set; +#else /* MACH_HOST */ + pset = &default_pset; +#endif /* MACH_HOST */ + simple_lock(&pset->runq.lock); +#if DEBUG + checkrq(&pset->runq, "thread_select"); +#endif /* DEBUG */ + if (pset->runq.count == 0) { + /* + * Nothing else runnable. Return if this + * thread is still runnable on this processor. + * Check for priority update if required. + */ + thread = current_thread(); + if ((thread->state == TH_RUN) && +#if MACH_HOST + (thread->processor_set == pset) && +#endif /* MACH_HOST */ + ((thread->bound_processor == PROCESSOR_NULL) || + (thread->bound_processor == myprocessor))) { + + simple_unlock(&pset->runq.lock); + thread_lock(thread); + if (thread->sched_stamp != sched_tick) + update_priority(thread); + thread_unlock(thread); + } + else { + thread = choose_pset_thread(myprocessor, pset); + } + } + else { + register queue_t q; + + /* + * If there is a thread at hint, grab it, + * else call choose_pset_thread. + */ + q = pset->runq.runq + pset->runq.low; + + if (queue_empty(q)) { + pset->runq.low++; + thread = choose_pset_thread(myprocessor, pset); + } + else { + thread = (thread_t) dequeue_head(q); + thread->runq = RUN_QUEUE_NULL; + pset->runq.count--; +#if MACH_FIXPRI + /* + * Cannot lazy evaluate pset->runq.low for + * fixed priority policy + */ + if ((pset->runq.count > 0) && + (pset->policies & POLICY_FIXEDPRI)) { + while (queue_empty(q)) { + pset->runq.low++; + q++; + } + } +#endif /* MACH_FIXPRI */ +#if DEBUG + checkrq(&pset->runq, "thread_select: after"); +#endif /* DEBUG */ + simple_unlock(&pset->runq.lock); + } + } + +#if MACH_FIXPRI + if (thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + myprocessor->quantum = pset->set_quantum; +#if MACH_FIXPRI + } + else { + /* + * POLICY_FIXEDPRI + */ + myprocessor->quantum = thread->sched_data; + } +#endif /* MACH_FIXPRI */ + } + + return thread; +} + +/* + * Stop running the current thread and start running the new thread. + * If continuation is non-zero, and the current thread is blocked, + * then it will resume by executing continuation on a new stack. + * Returns TRUE if the hand-off succeeds. + * Assumes splsched. + */ + +boolean_t thread_invoke( + register thread_t old_thread, + continuation_t continuation, + register thread_t new_thread) +{ + /* + * Check for invoking the same thread. + */ + if (old_thread == new_thread) { + /* + * Mark thread interruptible. + * Run continuation if there is one. + */ + thread_lock(new_thread); + new_thread->state &= ~TH_UNINT; + thread_unlock(new_thread); + + if (continuation != (void (*)()) 0) { + (void) spl0(); + call_continuation(continuation); + /*NOTREACHED*/ + } + return TRUE; + } + + /* + * Check for stack-handoff. + */ + thread_lock(new_thread); + if ((old_thread->stack_privilege != current_stack()) && + (continuation != (void (*)()) 0)) + { + switch (new_thread->state & TH_SWAP_STATE) { + case TH_SWAPPED: + + new_thread->state &= ~(TH_SWAPPED | TH_UNINT); + thread_unlock(new_thread); + +#if NCPUS > 1 + new_thread->last_processor = current_processor(); +#endif /* NCPUS > 1 */ + + /* + * Set up ast context of new thread and + * switch to its timer. + */ + ast_context(new_thread, cpu_number()); + timer_switch(&new_thread->system_timer); + + stack_handoff(old_thread, new_thread); + + /* + * We can dispatch the old thread now. + * This is like thread_dispatch, except + * that the old thread is left swapped + * *without* freeing its stack. + * This path is also much more frequent + * than actual calls to thread_dispatch. + */ + + thread_lock(old_thread); + old_thread->swap_func = continuation; + + switch (old_thread->state) { + case TH_RUN | TH_SUSP: + case TH_RUN | TH_SUSP | TH_HALTED: + case TH_RUN | TH_WAIT | TH_SUSP: + /* + * Suspend the thread + */ + old_thread->state = (old_thread->state & ~TH_RUN) + | TH_SWAPPED; + if (old_thread->wake_active) { + old_thread->wake_active = FALSE; + thread_unlock(old_thread); + thread_wakeup((event_t)&old_thread->wake_active); + + goto after_old_thread; + } + break; + + case TH_RUN | TH_SUSP | TH_UNINT: + case TH_RUN | TH_UNINT: + case TH_RUN: + /* + * We can`t suspend the thread yet, + * or it`s still running. + * Put back on a run queue. + */ + old_thread->state |= TH_SWAPPED; + thread_setrun(old_thread, FALSE); + break; + + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT: + /* + * Waiting, and not suspendable. + */ + old_thread->state = (old_thread->state & ~TH_RUN) + | TH_SWAPPED; + break; + + case TH_RUN | TH_IDLE: + /* + * Drop idle thread -- it is already in + * idle_thread_array. + */ + old_thread->state = TH_RUN | TH_IDLE | TH_SWAPPED; + break; + + default: + panic("thread_invoke"); + } + thread_unlock(old_thread); + after_old_thread: + + /* + * call_continuation calls the continuation + * after resetting the current stack pointer + * to recover stack space. If we called + * the continuation directly, we would risk + * running out of stack. + */ + + counter_always(c_thread_invoke_hits++); + (void) spl0(); + call_continuation(new_thread->swap_func); + /*NOTREACHED*/ + return TRUE; /* help for the compiler */ + + case TH_SW_COMING_IN: + /* + * Waiting for a stack + */ + thread_swapin(new_thread); + thread_unlock(new_thread); + counter_always(c_thread_invoke_misses++); + return FALSE; + + case 0: + /* + * Already has a stack - can`t handoff. + */ + break; + } + } + + else { + /* + * Check that the thread is swapped-in. + */ + if (new_thread->state & TH_SWAPPED) { + if ((new_thread->state & TH_SW_COMING_IN) || + !stack_alloc_try(new_thread, thread_continue)) + { + thread_swapin(new_thread); + thread_unlock(new_thread); + counter_always(c_thread_invoke_misses++); + return FALSE; + } + } + } + + new_thread->state &= ~(TH_SWAPPED | TH_UNINT); + thread_unlock(new_thread); + + /* + * Thread is now interruptible. + */ +#if NCPUS > 1 + new_thread->last_processor = current_processor(); +#endif /* NCPUS > 1 */ + + /* + * Set up ast context of new thread and switch to its timer. + */ + ast_context(new_thread, cpu_number()); + timer_switch(&new_thread->system_timer); + + /* + * switch_context is machine-dependent. It does the + * machine-dependent components of a context-switch, like + * changing address spaces. It updates active_threads. + * It returns only if a continuation is not supplied. + */ + counter_always(c_thread_invoke_csw++); + old_thread = switch_context(old_thread, continuation, new_thread); + + /* + * We're back. Now old_thread is the thread that resumed + * us, and we have to dispatch it. + */ + thread_dispatch(old_thread); + + return TRUE; +} + +/* + * thread_continue: + * + * Called when the current thread is given a new stack. + * Called at splsched. + */ +void thread_continue( + register thread_t old_thread) +{ + register continuation_t continuation = current_thread()->swap_func; + + /* + * We must dispatch the old thread and then + * call the current thread's continuation. + * There might not be an old thread, if we are + * the first thread to run on this processor. + */ + + if (old_thread != THREAD_NULL) + thread_dispatch(old_thread); + (void) spl0(); + (*continuation)(); + /*NOTREACHED*/ +} + + +/* + * thread_block: + * + * Block the current thread. If the thread is runnable + * then someone must have woken it up between its request + * to sleep and now. In this case, it goes back on a + * run queue. + * + * If a continuation is specified, then thread_block will + * attempt to discard the thread's kernel stack. When the + * thread resumes, it will execute the continuation function + * on a new kernel stack. + */ + +void thread_block( + continuation_t continuation) +{ + register thread_t thread = current_thread(); + register processor_t myprocessor = cpu_to_processor(cpu_number()); + register thread_t new_thread; + spl_t s; + + check_simple_locks(); + + s = splsched(); + +#if FAST_TAS + { + extern void recover_ras(); + + if (csw_needed(thread, myprocessor)) + recover_ras(thread); + } +#endif /* FAST_TAS */ + + ast_off(cpu_number(), AST_BLOCK); + + do + new_thread = thread_select(myprocessor); + while (!thread_invoke(thread, continuation, new_thread)); + + splx(s); +} + +/* + * thread_run: + * + * Switch directly from the current thread to a specified + * thread. Both the current and new threads must be + * runnable. + * + * If a continuation is specified, then thread_block will + * attempt to discard the current thread's kernel stack. When the + * thread resumes, it will execute the continuation function + * on a new kernel stack. + */ +void thread_run( + continuation_t continuation, + register thread_t new_thread) +{ + register thread_t thread = current_thread(); + register processor_t myprocessor = cpu_to_processor(cpu_number()); + spl_t s; + + check_simple_locks(); + + s = splsched(); + + while (!thread_invoke(thread, continuation, new_thread)) + new_thread = thread_select(myprocessor); + + splx(s); +} + +/* + * Dispatches a running thread that is not on a runq. + * Called at splsched. + */ + +void thread_dispatch( + register thread_t thread) +{ + /* + * If we are discarding the thread's stack, we must do it + * before the thread has a chance to run. + */ + + thread_lock(thread); + + if (thread->swap_func != (void (*)()) 0) { + assert((thread->state & TH_SWAP_STATE) == 0); + thread->state |= TH_SWAPPED; + stack_free(thread); + } + + switch (thread->state &~ TH_SWAP_STATE) { + case TH_RUN | TH_SUSP: + case TH_RUN | TH_SUSP | TH_HALTED: + case TH_RUN | TH_WAIT | TH_SUSP: + /* + * Suspend the thread + */ + thread->state &= ~TH_RUN; + if (thread->wake_active) { + thread->wake_active = FALSE; + thread_unlock(thread); + thread_wakeup((event_t)&thread->wake_active); + return; + } + break; + + case TH_RUN | TH_SUSP | TH_UNINT: + case TH_RUN | TH_UNINT: + case TH_RUN: + /* + * No reason to stop. Put back on a run queue. + */ + thread_setrun(thread, FALSE); + break; + + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + case TH_RUN | TH_WAIT | TH_UNINT: + case TH_RUN | TH_WAIT: + /* + * Waiting, and not suspended. + */ + thread->state &= ~TH_RUN; + break; + + case TH_RUN | TH_IDLE: + /* + * Drop idle thread -- it is already in + * idle_thread_array. + */ + break; + + default: + panic("thread_dispatch"); + } + thread_unlock(thread); +} + + +/* + * Define shifts for simulating (5/8)**n + */ + +shift_data_t wait_shift[32] = { + {1,1},{1,3},{1,-3},{2,-7},{3,5},{3,-5},{4,-8},{5,7}, + {5,-7},{6,-10},{7,10},{7,-9},{8,-11},{9,12},{9,-11},{10,-13}, + {11,14},{11,-13},{12,-15},{13,17},{13,-15},{14,-17},{15,19},{16,18}, + {16,-19},{17,22},{18,20},{18,-20},{19,26},{20,22},{20,-22},{21,-27}}; + +/* + * do_priority_computation: + * + * Calculate new priority for thread based on its base priority plus + * accumulated usage. PRI_SHIFT and PRI_SHIFT_2 convert from + * usage to priorities. SCHED_SHIFT converts for the scaling + * of the sched_usage field by SCHED_SCALE. This scaling comes + * from the multiplication by sched_load (thread_timer_delta) + * in sched.h. sched_load is calculated as a scaled overload + * factor in compute_mach_factor (mach_factor.c). + */ + +#ifdef PRI_SHIFT_2 +#if PRI_SHIFT_2 > 0 +#define do_priority_computation(th, pri) \ + MACRO_BEGIN \ + (pri) = (th)->priority /* start with base priority */ \ + + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ + + ((th)->sched_usage >> (PRI_SHIFT_2 + SCHED_SHIFT)); \ + if ((pri) > 31) (pri) = 31; \ + MACRO_END +#else /* PRI_SHIFT_2 */ +#define do_priority_computation(th, pri) \ + MACRO_BEGIN \ + (pri) = (th)->priority /* start with base priority */ \ + + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)) \ + - ((th)->sched_usage >> (SCHED_SHIFT - PRI_SHIFT_2)); \ + if ((pri) > 31) (pri) = 31; \ + MACRO_END +#endif /* PRI_SHIFT_2 */ +#else /* defined(PRI_SHIFT_2) */ +#define do_priority_computation(th, pri) \ + MACRO_BEGIN \ + (pri) = (th)->priority /* start with base priority */ \ + + ((th)->sched_usage >> (PRI_SHIFT + SCHED_SHIFT)); \ + if ((pri) > 31) (pri) = 31; \ + MACRO_END +#endif /* defined(PRI_SHIFT_2) */ + +/* + * compute_priority: + * + * Compute the effective priority of the specified thread. + * The effective priority computation is as follows: + * + * Take the base priority for this thread and add + * to it an increment derived from its cpu_usage. + * + * The thread *must* be locked by the caller. + */ + +void compute_priority( + register thread_t thread, + boolean_t resched) +{ + register int pri; + +#if MACH_FIXPRI + if (thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + do_priority_computation(thread, pri); + if (thread->depress_priority < 0) + set_pri(thread, pri, resched); + else + thread->depress_priority = pri; +#if MACH_FIXPRI + } + else { + set_pri(thread, thread->priority, resched); + } +#endif /* MACH_FIXPRI */ +} + +/* + * compute_my_priority: + * + * Version of compute priority for current thread or thread + * being manipulated by scheduler (going on or off a runq). + * Only used for priority updates. Policy or priority changes + * must call compute_priority above. Caller must have thread + * locked and know it is timesharing and not depressed. + */ + +void compute_my_priority( + register thread_t thread) +{ + register int temp_pri; + + do_priority_computation(thread,temp_pri); + thread->sched_pri = temp_pri; +} + +/* + * recompute_priorities: + * + * Update the priorities of all threads periodically. + */ +void recompute_priorities(void) +{ +#if SIMPLE_CLOCK + int new_usec; +#endif /* SIMPLE_CLOCK */ + + sched_tick++; /* age usage one more time */ + set_timeout(&recompute_priorities_timer, hz); +#if SIMPLE_CLOCK + /* + * Compensate for clock drift. sched_usec is an + * exponential average of the number of microseconds in + * a second. It decays in the same fashion as cpu_usage. + */ + new_usec = sched_usec_elapsed(); + sched_usec = (5*sched_usec + 3*new_usec)/8; +#endif /* SIMPLE_CLOCK */ + /* + * Wakeup scheduler thread. + */ + if (sched_thread_id != THREAD_NULL) { + clear_wait(sched_thread_id, THREAD_AWAKENED, FALSE); + } +} + +/* + * update_priority + * + * Cause the priority computation of a thread that has been + * sleeping or suspended to "catch up" with the system. Thread + * *MUST* be locked by caller. If thread is running, then this + * can only be called by the thread on itself. + */ +void update_priority( + register thread_t thread) +{ + register unsigned int ticks; + register shift_t shiftp; + register int temp_pri; + + ticks = sched_tick - thread->sched_stamp; + + assert(ticks != 0); + + /* + * If asleep for more than 30 seconds forget all + * cpu_usage, else catch up on missed aging. + * 5/8 ** n is approximated by the two shifts + * in the wait_shift array. + */ + thread->sched_stamp += ticks; + thread_timer_delta(thread); + if (ticks > 30) { + thread->cpu_usage = 0; + thread->sched_usage = 0; + } + else { + thread->cpu_usage += thread->cpu_delta; + thread->sched_usage += thread->sched_delta; + shiftp = &wait_shift[ticks]; + if (shiftp->shift2 > 0) { + thread->cpu_usage = + (thread->cpu_usage >> shiftp->shift1) + + (thread->cpu_usage >> shiftp->shift2); + thread->sched_usage = + (thread->sched_usage >> shiftp->shift1) + + (thread->sched_usage >> shiftp->shift2); + } + else { + thread->cpu_usage = + (thread->cpu_usage >> shiftp->shift1) - + (thread->cpu_usage >> -(shiftp->shift2)); + thread->sched_usage = + (thread->sched_usage >> shiftp->shift1) - + (thread->sched_usage >> -(shiftp->shift2)); + } + } + thread->cpu_delta = 0; + thread->sched_delta = 0; + /* + * Recompute priority if appropriate. + */ + if ( +#if MACH_FIXPRI + (thread->policy == POLICY_TIMESHARE) && +#endif /* MACH_FIXPRI */ + (thread->depress_priority < 0)) { + do_priority_computation(thread, temp_pri); + thread->sched_pri = temp_pri; + } +} + +/* + * run_queue_enqueue macro for thread_setrun(). + */ +#if DEBUG +#define run_queue_enqueue(rq, th) \ + MACRO_BEGIN \ + register unsigned int whichq; \ + \ + whichq = (th)->sched_pri; \ + if (whichq >= NRQS) { \ + printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri); \ + whichq = NRQS - 1; \ + } \ + \ + simple_lock(&(rq)->lock); /* lock the run queue */ \ + checkrq((rq), "thread_setrun: before adding thread"); \ + enqueue_tail(&(rq)->runq[whichq], (queue_entry_t) (th)); \ + \ + if (whichq < (rq)->low || (rq)->count == 0) \ + (rq)->low = whichq; /* minimize */ \ + \ + (rq)->count++; \ + (th)->runq = (rq); \ + thread_check((th), (rq)); \ + checkrq((rq), "thread_setrun: after adding thread"); \ + simple_unlock(&(rq)->lock); \ + MACRO_END +#else /* DEBUG */ +#define run_queue_enqueue(rq, th) \ + MACRO_BEGIN \ + register unsigned int whichq; \ + \ + whichq = (th)->sched_pri; \ + if (whichq >= NRQS) { \ + printf("thread_setrun: pri too high (%d)\n", (th)->sched_pri); \ + whichq = NRQS - 1; \ + } \ + \ + simple_lock(&(rq)->lock); /* lock the run queue */ \ + enqueue_tail(&(rq)->runq[whichq], (queue_entry_t) (th)); \ + \ + if (whichq < (rq)->low || (rq)->count == 0) \ + (rq)->low = whichq; /* minimize */ \ + \ + (rq)->count++; \ + (th)->runq = (rq); \ + simple_unlock(&(rq)->lock); \ + MACRO_END +#endif /* DEBUG */ +/* + * thread_setrun: + * + * Make thread runnable; dispatch directly onto an idle processor + * if possible. Else put on appropriate run queue (processor + * if bound, else processor set. Caller must have lock on thread. + * This is always called at splsched. + */ + +void thread_setrun( + register thread_t th, + boolean_t may_preempt) +{ + register processor_t processor; + register run_queue_t rq; +#if NCPUS > 1 + register processor_set_t pset; +#endif /* NCPUS > 1 */ + + /* + * Update priority if needed. + */ + if (th->sched_stamp != sched_tick) { + update_priority(th); + } + + assert(th->runq == RUN_QUEUE_NULL); + +#if NCPUS > 1 + /* + * Try to dispatch the thread directly onto an idle processor. + */ + if ((processor = th->bound_processor) == PROCESSOR_NULL) { + /* + * Not bound, any processor in the processor set is ok. + */ + pset = th->processor_set; +#if HW_FOOTPRINT + /* + * But first check the last processor it ran on. + */ + processor = th->last_processor; + if (processor->state == PROCESSOR_IDLE) { + simple_lock(&processor->lock); + simple_lock(&pset->idle_lock); + if ((processor->state == PROCESSOR_IDLE) +#if MACH_HOST + && (processor->processor_set == pset) +#endif /* MACH_HOST */ + ) { + queue_remove(&pset->idle_queue, processor, + processor_t, processor_queue); + pset->idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + simple_unlock(&pset->idle_lock); + simple_unlock(&processor->lock); + return; + } + simple_unlock(&pset->idle_lock); + simple_unlock(&processor->lock); + } +#endif /* HW_FOOTPRINT */ + + if (pset->idle_count > 0) { + simple_lock(&pset->idle_lock); + if (pset->idle_count > 0) { + processor = (processor_t) queue_first(&pset->idle_queue); + queue_remove(&(pset->idle_queue), processor, processor_t, + processor_queue); + pset->idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + simple_unlock(&pset->idle_lock); + return; + } + simple_unlock(&pset->idle_lock); + } + rq = &(pset->runq); + run_queue_enqueue(rq,th); + /* + * Preempt check + */ + if (may_preempt && +#if MACH_HOST + (pset == current_processor()->processor_set) && +#endif /* MACH_HOST */ + (current_thread()->sched_pri > th->sched_pri)) { + /* + * Turn off first_quantum to allow csw. + */ + current_processor()->first_quantum = FALSE; + ast_on(cpu_number(), AST_BLOCK); + } + } + else { + /* + * Bound, can only run on bound processor. Have to lock + * processor here because it may not be the current one. + */ + if (processor->state == PROCESSOR_IDLE) { + simple_lock(&processor->lock); + pset = processor->processor_set; + simple_lock(&pset->idle_lock); + if (processor->state == PROCESSOR_IDLE) { + queue_remove(&pset->idle_queue, processor, + processor_t, processor_queue); + pset->idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + simple_unlock(&pset->idle_lock); + simple_unlock(&processor->lock); + return; + } + simple_unlock(&pset->idle_lock); + simple_unlock(&processor->lock); + } + rq = &(processor->runq); + run_queue_enqueue(rq,th); + + /* + * Cause ast on processor if processor is on line. + * + * XXX Don't do this remotely to master because this will + * XXX send an interprocessor interrupt, and that's too + * XXX expensive for all the unparallelized U*x code. + */ + if (processor == current_processor()) { + ast_on(cpu_number(), AST_BLOCK); + } + else if ((processor != master_processor) && + (processor->state != PROCESSOR_OFF_LINE)) { + cause_ast_check(processor); + } + } +#else /* NCPUS > 1 */ + /* + * XXX should replace queue with a boolean in this case. + */ + if (default_pset.idle_count > 0) { + processor = (processor_t) queue_first(&default_pset.idle_queue); + queue_remove(&default_pset.idle_queue, processor, + processor_t, processor_queue); + default_pset.idle_count--; + processor->next_thread = th; + processor->state = PROCESSOR_DISPATCHING; + return; + } + if (th->bound_processor == PROCESSOR_NULL) { + rq = &(default_pset.runq); + } + else { + rq = &(master_processor->runq); + ast_on(cpu_number(), AST_BLOCK); + } + run_queue_enqueue(rq,th); + + /* + * Preempt check + */ + if (may_preempt && (current_thread()->sched_pri > th->sched_pri)) { + /* + * Turn off first_quantum to allow context switch. + */ + current_processor()->first_quantum = FALSE; + ast_on(cpu_number(), AST_BLOCK); + } +#endif /* NCPUS > 1 */ +} + +/* + * set_pri: + * + * Set the priority of the specified thread to the specified + * priority. This may cause the thread to change queues. + * + * The thread *must* be locked by the caller. + */ + +void set_pri( + thread_t th, + int pri, + boolean_t resched) +{ + register struct run_queue *rq; + + rq = rem_runq(th); + th->sched_pri = pri; + if (rq != RUN_QUEUE_NULL) { + if (resched) + thread_setrun(th, TRUE); + else + run_queue_enqueue(rq, th); + } +} + +/* + * rem_runq: + * + * Remove a thread from its run queue. + * The run queue that the process was on is returned + * (or RUN_QUEUE_NULL if not on a run queue). Thread *must* be locked + * before calling this routine. Unusual locking protocol on runq + * field in thread structure makes this code interesting; see thread.h. + */ + +struct run_queue *rem_runq( + thread_t th) +{ + register struct run_queue *rq; + + rq = th->runq; + /* + * If rq is RUN_QUEUE_NULL, the thread will stay out of the + * run_queues because the caller locked the thread. Otherwise + * the thread is on a runq, but could leave. + */ + if (rq != RUN_QUEUE_NULL) { + simple_lock(&rq->lock); +#if DEBUG + checkrq(rq, "rem_runq: at entry"); +#endif /* DEBUG */ + if (rq == th->runq) { + /* + * Thread is in a runq and we have a lock on + * that runq. + */ +#if DEBUG + checkrq(rq, "rem_runq: before removing thread"); + thread_check(th, rq); +#endif /* DEBUG */ + remqueue(&rq->runq[0], (queue_entry_t) th); + rq->count--; +#if DEBUG + checkrq(rq, "rem_runq: after removing thread"); +#endif /* DEBUG */ + th->runq = RUN_QUEUE_NULL; + simple_unlock(&rq->lock); + } + else { + /* + * The thread left the runq before we could + * lock the runq. It is not on a runq now, and + * can't move again because this routine's + * caller locked the thread. + */ + simple_unlock(&rq->lock); + rq = RUN_QUEUE_NULL; + } + } + + return rq; +} + + +/* + * choose_thread: + * + * Choose a thread to execute. The thread chosen is removed + * from its run queue. Note that this requires only that the runq + * lock be held. + * + * Strategy: + * Check processor runq first; if anything found, run it. + * Else check pset runq; if nothing found, return idle thread. + * + * Second line of strategy is implemented by choose_pset_thread. + * This is only called on processor startup and when thread_block + * thinks there's something in the processor runq. + */ + +thread_t choose_thread( + processor_t myprocessor) +{ + thread_t th; + register queue_t q; + register run_queue_t runq; + register int i; + register processor_set_t pset; + + runq = &myprocessor->runq; + + simple_lock(&runq->lock); + if (runq->count > 0) { + q = runq->runq + runq->low; + for (i = runq->low; i < NRQS ; i++, q++) { + if (!queue_empty(q)) { + th = (thread_t) dequeue_head(q); + th->runq = RUN_QUEUE_NULL; + runq->count--; + runq->low = i; + simple_unlock(&runq->lock); + return th; + } + } + panic("choose_thread"); + /*NOTREACHED*/ + } + simple_unlock(&runq->lock); + + pset = myprocessor->processor_set; + + simple_lock(&pset->runq.lock); + return choose_pset_thread(myprocessor,pset); +} + +/* + * choose_pset_thread: choose a thread from processor_set runq or + * set processor idle and choose its idle thread. + * + * Caller must be at splsched and have a lock on the runq. This + * lock is released by this routine. myprocessor is always the current + * processor, and pset must be its processor set. + * This routine chooses and removes a thread from the runq if there + * is one (and returns it), else it sets the processor idle and + * returns its idle thread. + */ + +thread_t choose_pset_thread( + register processor_t myprocessor, + processor_set_t pset) +{ + register run_queue_t runq; + register thread_t th; + register queue_t q; + register int i; + + runq = &pset->runq; + + if (runq->count > 0) { + q = runq->runq + runq->low; + for (i = runq->low; i < NRQS ; i++, q++) { + if (!queue_empty(q)) { + th = (thread_t) dequeue_head(q); + th->runq = RUN_QUEUE_NULL; + runq->count--; + /* + * For POLICY_FIXEDPRI, runq->low must be + * accurate! + */ +#if MACH_FIXPRI + if ((runq->count > 0) && + (pset->policies & POLICY_FIXEDPRI)) { + while (queue_empty(q)) { + q++; + i++; + } + } +#endif /* MACH_FIXPRI */ + runq->low = i; +#if DEBUG + checkrq(runq, "choose_pset_thread"); +#endif /* DEBUG */ + simple_unlock(&runq->lock); + return th; + } + } + panic("choose_pset_thread"); + /*NOTREACHED*/ + } + simple_unlock(&runq->lock); + + /* + * Nothing is runnable, so set this processor idle if it + * was running. If it was in an assignment or shutdown, + * leave it alone. Return its idle thread. + */ + simple_lock(&pset->idle_lock); + if (myprocessor->state == PROCESSOR_RUNNING) { + myprocessor->state = PROCESSOR_IDLE; + /* + * XXX Until it goes away, put master on end of queue, others + * XXX on front so master gets used last. + */ + if (myprocessor == master_processor) { + queue_enter(&(pset->idle_queue), myprocessor, + processor_t, processor_queue); + } + else { + queue_enter_first(&(pset->idle_queue), myprocessor, + processor_t, processor_queue); + } + + pset->idle_count++; + } + simple_unlock(&pset->idle_lock); + + return myprocessor->idle_thread; +} + +/* + * no_dispatch_count counts number of times processors go non-idle + * without being dispatched. This should be very rare. + */ +int no_dispatch_count = 0; + +/* + * This is the idle thread, which just looks for other threads + * to execute. + */ + +void idle_thread_continue(void) +{ + register processor_t myprocessor; + register volatile thread_t *threadp; + register volatile int *gcount; + register volatile int *lcount; + register thread_t new_thread; + register int state; + int mycpu; + spl_t s; + + mycpu = cpu_number(); + myprocessor = current_processor(); + threadp = (volatile thread_t *) &myprocessor->next_thread; + lcount = (volatile int *) &myprocessor->runq.count; + + while (TRUE) { +#ifdef MARK_CPU_IDLE + MARK_CPU_IDLE(mycpu); +#endif /* MARK_CPU_IDLE */ + +#if MACH_HOST + gcount = (volatile int *) + &myprocessor->processor_set->runq.count; +#else /* MACH_HOST */ + gcount = (volatile int *) &default_pset.runq.count; +#endif /* MACH_HOST */ + +/* + * This cpu will be dispatched (by thread_setrun) by setting next_thread + * to the value of the thread to run next. Also check runq counts. + */ + while ((*threadp == (volatile thread_t)THREAD_NULL) && + (*gcount == 0) && (*lcount == 0)) { + + /* check for ASTs while we wait */ + + if (need_ast[mycpu] &~ AST_SCHEDULING) { + (void) splsched(); + /* don't allow scheduling ASTs */ + need_ast[mycpu] &= ~AST_SCHEDULING; + ast_taken(); + /* back at spl0 */ + } + + /* + * machine_idle is a machine dependent function, + * to conserve power. + */ +#if POWER_SAVE + machine_idle(mycpu); +#endif /* POWER_SAVE */ + } + +#ifdef MARK_CPU_ACTIVE + MARK_CPU_ACTIVE(mycpu); +#endif /* MARK_CPU_ACTIVE */ + + s = splsched(); + + /* + * This is not a switch statement to avoid the + * bounds checking code in the common case. + */ +retry: + state = myprocessor->state; + if (state == PROCESSOR_DISPATCHING) { + /* + * Commmon case -- cpu dispatched. + */ + new_thread = (thread_t) *threadp; + *threadp = (volatile thread_t) THREAD_NULL; + myprocessor->state = PROCESSOR_RUNNING; + /* + * set up quantum for new thread. + */ +#if MACH_FIXPRI + if (new_thread->policy == POLICY_TIMESHARE) { +#endif /* MACH_FIXPRI */ + /* + * Just use set quantum. No point in + * checking for shorter local runq quantum; + * csw_needed will handle correctly. + */ +#if MACH_HOST + myprocessor->quantum = new_thread-> + processor_set->set_quantum; +#else /* MACH_HOST */ + myprocessor->quantum = + default_pset.set_quantum; +#endif /* MACH_HOST */ + +#if MACH_FIXPRI + } + else { + /* + * POLICY_FIXEDPRI + */ + myprocessor->quantum = new_thread->sched_data; + } +#endif /* MACH_FIXPRI */ + myprocessor->first_quantum = TRUE; + counter(c_idle_thread_handoff++); + thread_run(idle_thread_continue, new_thread); + } + else if (state == PROCESSOR_IDLE) { + register processor_set_t pset; + + pset = myprocessor->processor_set; + simple_lock(&pset->idle_lock); + if (myprocessor->state != PROCESSOR_IDLE) { + /* + * Something happened, try again. + */ + simple_unlock(&pset->idle_lock); + goto retry; + } + /* + * Processor was not dispatched (Rare). + * Set it running again. + */ + no_dispatch_count++; + pset->idle_count--; + queue_remove(&pset->idle_queue, myprocessor, + processor_t, processor_queue); + myprocessor->state = PROCESSOR_RUNNING; + simple_unlock(&pset->idle_lock); + counter(c_idle_thread_block++); + thread_block(idle_thread_continue); + } + else if ((state == PROCESSOR_ASSIGN) || + (state == PROCESSOR_SHUTDOWN)) { + /* + * Changing processor sets, or going off-line. + * Release next_thread if there is one. Actual + * thread to run is on a runq. + */ + if ((new_thread = (thread_t)*threadp)!= THREAD_NULL) { + *threadp = (volatile thread_t) THREAD_NULL; + thread_setrun(new_thread, FALSE); + } + + counter(c_idle_thread_block++); + thread_block(idle_thread_continue); + } + else { + printf(" Bad processor state %d (Cpu %d)\n", + cpu_state(mycpu), mycpu); + panic("idle_thread"); + } + + (void) splx(s); + } +} + +void idle_thread(void) +{ + register thread_t self = current_thread(); + spl_t s; + + stack_privilege(self); + + s = splsched(); + self->priority = 31; + self->sched_pri = 31; + + /* + * Set the idle flag to indicate that this is an idle thread, + * enter ourselves in the idle array, and thread_block() to get + * out of the run queues (and set the processor idle when we + * run next time). + */ + thread_lock(self); + self->state |= TH_IDLE; + thread_unlock(self); + current_processor()->idle_thread = self; + (void) splx(s); + + counter(c_idle_thread_block++); + thread_block(idle_thread_continue); + idle_thread_continue(); + /*NOTREACHED*/ +} + +/* + * sched_thread: scheduler thread. + * + * This thread handles periodic calculations in the scheduler that + * we don't want to do at interrupt level. This allows us to + * avoid blocking. + */ +void sched_thread_continue(void) +{ + while (TRUE) { + (void) compute_mach_factor(); + + /* + * Check for stuck threads. This can't be done off of + * the callout queue because it requires operations that + * can't be used from interrupt level. + */ + if (sched_tick & 1) + do_thread_scan(); + + assert_wait((event_t) 0, FALSE); + counter(c_sched_thread_block++); + thread_block(sched_thread_continue); + } +} + +void sched_thread(void) +{ + sched_thread_id = current_thread(); + + /* + * Sleep on event 0, recompute_priorities() will awaken + * us by calling clear_wait(). + */ + assert_wait((event_t) 0, FALSE); + counter(c_sched_thread_block++); + thread_block(sched_thread_continue); + sched_thread_continue(); + /*NOTREACHED*/ +} + +#define MAX_STUCK_THREADS 16 + +/* + * do_thread_scan: scan for stuck threads. A thread is stuck if + * it is runnable but its priority is so low that it has not + * run for several seconds. Its priority should be higher, but + * won't be until it runs and calls update_priority. The scanner + * finds these threads and does the updates. + * + * Scanner runs in two passes. Pass one squirrels likely + * thread ids away in an array, and removes them from the run queue. + * Pass two does the priority updates. This is necessary because + * the run queue lock is required for the candidate scan, but + * cannot be held during updates [set_pri will deadlock]. + * + * Array length should be enough so that restart isn't necessary, + * but restart logic is included. Does not scan processor runqs. + * + */ + +boolean_t do_thread_scan_debug = FALSE; + +thread_t stuck_threads[MAX_STUCK_THREADS]; +int stuck_count = 0; + +/* + * do_runq_scan is the guts of pass 1. It scans a runq for + * stuck threads. A boolean is returned indicating whether + * it ran out of space. + */ + +boolean_t +do_runq_scan( + run_queue_t runq) +{ + register spl_t s; + register queue_t q; + register thread_t thread; + register int count; + + s = splsched(); + simple_lock(&runq->lock); + if((count = runq->count) > 0) { + q = runq->runq + runq->low; + while (count > 0) { + thread = (thread_t) queue_first(q); + while (!queue_end(q, (queue_entry_t) thread)) { + /* + * Get the next thread now, since we may + * remove this thread from the run queue. + */ + thread_t next = (thread_t) queue_next(&thread->links); + + if ((thread->state & TH_SCHED_STATE) == TH_RUN && + sched_tick - thread->sched_stamp > 1) { + /* + * Stuck, save its id for later. + */ + if (stuck_count == MAX_STUCK_THREADS) { + /* + * !@#$% No more room. + */ + simple_unlock(&runq->lock); + splx(s); + return TRUE; + } + /* + * We can`t take the thread_lock here, + * since we already have the runq lock. + * So we can`t grab a reference to the + * thread. However, a thread that is + * in RUN state cannot be deallocated + * until it stops running. If it isn`t + * on the runq, then thread_halt cannot + * see it. So we remove the thread + * from the runq to make it safe. + */ + remqueue(q, (queue_entry_t) thread); + runq->count--; + thread->runq = RUN_QUEUE_NULL; + + stuck_threads[stuck_count++] = thread; +if (do_thread_scan_debug) + printf("do_runq_scan: adding thread %#x\n", thread); + } + count--; + thread = next; + } + q++; + } + } + simple_unlock(&runq->lock); + splx(s); + + return FALSE; +} + +void do_thread_scan(void) +{ + register spl_t s; + register boolean_t restart_needed = 0; + register thread_t thread; +#if MACH_HOST + register processor_set_t pset; +#endif /* MACH_HOST */ + + do { +#if MACH_HOST + simple_lock(&all_psets_lock); + queue_iterate(&all_psets, pset, processor_set_t, all_psets) { + if (restart_needed = do_runq_scan(&pset->runq)) + break; + } + simple_unlock(&all_psets_lock); +#else /* MACH_HOST */ + restart_needed = do_runq_scan(&default_pset.runq); +#endif /* MACH_HOST */ + if (!restart_needed) + restart_needed = do_runq_scan(&master_processor->runq); + + /* + * Ok, we now have a collection of candidates -- fix them. + */ + + while (stuck_count > 0) { + thread = stuck_threads[--stuck_count]; + stuck_threads[stuck_count] = THREAD_NULL; + s = splsched(); + thread_lock(thread); + if ((thread->state & TH_SCHED_STATE) == TH_RUN) { + /* + * Do the priority update. Call + * thread_setrun because thread is + * off the run queues. + */ + update_priority(thread); + thread_setrun(thread, TRUE); + } + thread_unlock(thread); + splx(s); + } + } while (restart_needed); +} + +#if DEBUG +void checkrq( + run_queue_t rq, + char *msg) +{ + register queue_t q1; + register int i, j; + register queue_entry_t e; + register int low; + + low = -1; + j = 0; + q1 = rq->runq; + for (i = 0; i < NRQS; i++) { + if (q1->next == q1) { + if (q1->prev != q1) + panic("checkrq: empty at %s", msg); + } + else { + if (low == -1) + low = i; + + for (e = q1->next; e != q1; e = e->next) { + j++; + if (e->next->prev != e) + panic("checkrq-2 at %s", msg); + if (e->prev->next != e) + panic("checkrq-3 at %s", msg); + } + } + q1++; + } + if (j != rq->count) + panic("checkrq: count wrong at %s", msg); + if (rq->count != 0 && low < rq->low) + panic("checkrq: low wrong at %s", msg); +} + +void thread_check( + register thread_t th, + register run_queue_t rq) +{ + register unsigned int whichq; + + whichq = th->sched_pri; + if (whichq >= NRQS) { + printf("thread_check: priority too high\n"); + whichq = NRQS-1; + } + if ((th->links.next == &rq->runq[whichq]) && + (rq->runq[whichq].prev != (queue_entry_t)th)) + panic("thread_check"); +} +#endif /* DEBUG */ diff --git a/kern/sched_prim.h b/kern/sched_prim.h new file mode 100644 index 0000000..ef89514 --- /dev/null +++ b/kern/sched_prim.h @@ -0,0 +1,163 @@ +/* + * Mach Operating System + * Copyright (c) 1992,1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: sched_prim.h + * Author: David Golub + * + * Scheduling primitive definitions file + * + */ + +#ifndef _KERN_SCHED_PRIM_H_ +#define _KERN_SCHED_PRIM_H_ + +#include <mach/boolean.h> +#include <mach/message.h> /* for mach_msg_timeout_t */ +#include <kern/lock.h> +#include <kern/kern_types.h> /* for thread_t */ + +/* + * Possible results of assert_wait - returned in + * current_thread()->wait_result. + */ +#define THREAD_AWAKENED 0 /* normal wakeup */ +#define THREAD_TIMED_OUT 1 /* timeout expired */ +#define THREAD_INTERRUPTED 2 /* interrupted by clear_wait */ +#define THREAD_RESTART 3 /* restart operation entirely */ + +typedef void *event_t; /* wait event */ + +typedef void (*continuation_t)(void); /* continuation */ + +/* + * Exported interface to sched_prim.c. + */ + +extern void sched_init(void); + +extern void assert_wait( + event_t event, + boolean_t interruptible); +extern void clear_wait( + thread_t thread, + int result, + boolean_t interrupt_only); +extern void thread_sleep( + event_t event, + simple_lock_t lock, + boolean_t interruptible); +extern void thread_wakeup(); /* for function pointers */ +extern void thread_wakeup_prim( + event_t event, + boolean_t one_thread, + int result); +extern boolean_t thread_invoke( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread); +extern void thread_block( + continuation_t continuation); +extern void thread_run( + continuation_t continuation, + thread_t new_thread); +extern void thread_set_timeout( + int t); +extern void thread_setrun( + thread_t thread, + boolean_t may_preempt); +extern void thread_dispatch( + thread_t thread); +extern void thread_continue( + thread_t old_thread); +extern void thread_go( + thread_t thread); +extern void thread_will_wait( + thread_t thread); +extern void thread_will_wait_with_timeout( + thread_t thread, + mach_msg_timeout_t msecs); +extern boolean_t thread_handoff( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread); +extern void recompute_priorities(); + +/* + * Routines defined as macros + */ + +#define thread_wakeup(x) \ + thread_wakeup_prim((x), FALSE, THREAD_AWAKENED) +#define thread_wakeup_with_result(x, z) \ + thread_wakeup_prim((x), FALSE, (z)) +#define thread_wakeup_one(x) \ + thread_wakeup_prim((x), TRUE, THREAD_AWAKENED) + +/* + * Machine-dependent code must define these functions. + */ + +extern void thread_bootstrap_return(void); +extern void thread_exception_return(void); +#ifdef __GNUC__ +extern void __volatile__ thread_syscall_return(kern_return_t); +#else +extern void thread_syscall_return(kern_return_t); +#endif +extern thread_t switch_context( + thread_t old_thread, + continuation_t continuation, + thread_t new_thread); +extern void stack_handoff( + thread_t old_thread, + thread_t new_thread); + +/* + * These functions are either defined in kern/thread.c + * via machine-dependent stack_attach and stack_detach functions, + * or are defined directly by machine-dependent code. + */ + +extern void stack_alloc( + thread_t thread, + void (*resume)(thread_t)); +extern boolean_t stack_alloc_try( + thread_t thread, + void (*resume)(thread_t)); +extern void stack_free( + thread_t thread); + +/* + * Convert a timeout in milliseconds (mach_msg_timeout_t) + * to a timeout in ticks (for use by set_timeout). + * This conversion rounds UP so that small timeouts + * at least wait for one tick instead of not waiting at all. + */ + +#define convert_ipc_timeout_to_ticks(millis) \ + (((millis) * hz + 999) / 1000) + +#endif /* _KERN_SCHED_PRIM_H_ */ diff --git a/kern/server_loop.ch b/kern/server_loop.ch new file mode 100644 index 0000000..5a0c69c --- /dev/null +++ b/kern/server_loop.ch @@ -0,0 +1,102 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/server_loop.c + * + * A common server loop for builtin tasks. + */ + +/* + * Must define symbols for: + * SERVER_NAME String name of this module + * SERVER_LOOP Routine name for the loop + * SERVER_DISPATCH MiG function(s) to handle message + * + * Must redefine symbols for pager_server functions. + */ + +#include <mach/port.h> +#include <mach/message.h> +#include <vm/vm_kern.h> /* for kernel_map */ + +void SERVER_LOOP(rcv_set, max_size) +{ + register mach_msg_header_t *in_msg; + register mach_msg_header_t *out_msg; + register mach_msg_header_t *tmp_msg; + vm_offset_t messages; + mach_msg_return_t r; + + /* + * Allocate our message buffers. + */ + + messages = kalloc(2 * max_size); + if (messages == 0) + panic(SERVER_NAME); + in_msg = (mach_msg_header_t *) messages; + out_msg = (mach_msg_header_t *) (messages + max_size); + + /* + * Service loop... receive messages and process them. + */ + + for (;;) { + /* receive first message */ + + receive_msg: + r = mach_msg(in_msg, MACH_RCV_MSG, 0, max_size, rcv_set, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + if (r == MACH_MSG_SUCCESS) + break; + + printf("%s: receive failed, 0x%x.\n", SERVER_NAME, r); + } + + for (;;) { + /* process request message */ + + (void) SERVER_DISPATCH(in_msg, out_msg); + + /* send reply and receive next request */ + + if (out_msg->msgh_remote_port == MACH_PORT_NULL) + goto receive_msg; + + r = mach_msg(out_msg, MACH_SEND_MSG|MACH_RCV_MSG, + out_msg->msgh_size, max_size, rcv_set, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + if (r != MACH_MSG_SUCCESS) { + printf("%s: send/receive failed, 0x%x.\n", + SERVER_NAME, r); + goto receive_msg; + } + + /* swap message buffers */ + + tmp_msg = in_msg; in_msg = out_msg; out_msg = tmp_msg; + } +} diff --git a/kern/shuttle.h b/kern/shuttle.h new file mode 100644 index 0000000..e8e574b --- /dev/null +++ b/kern/shuttle.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + */ +/* + * File: shuttle.h + * Author: Bryan Ford + * + * This file contains definitions for shuttles, + * which handle microscheduling for individual threads. + * + */ + +#ifndef _KERN_SHUTTLE_H_ +#define _KERN_SHUTTLE_H_ + +#include <kern/lock.h> + + + +struct Shuttle { + /* XXX must be first in thread */ +/* + * NOTE: The runq field in the thread structure has an unusual + * locking protocol. If its value is RUN_QUEUE_NULL, then it is + * locked by the thread_lock, but if its value is something else + * (i.e. a run_queue) then it is locked by that run_queue's lock. + */ + queue_chain_t links; /* current run queue links */ + run_queue_t runq; /* run queue p is on SEE BELOW */ + + /* Next pointer when on a queue */ + struct Shuttle *next; + + /* Micropriority level */ + int priority; + + /* General-purpose pointer field whose use depends on what the + thread happens to be doing */ + void *message; + + int foobar[1]; +}; +typedef struct Shuttle Shuttle; + + + +/* Exported functions */ + + + +/* Machine-dependent code must define the following functions */ + + + +#endif _KERN_SHUTTLE_H_ diff --git a/kern/startup.c b/kern/startup.c new file mode 100644 index 0000000..dc0d5a0 --- /dev/null +++ b/kern/startup.c @@ -0,0 +1,305 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Mach kernel startup. + */ + + +#include <xpr_debug.h> +#include <cpus.h> +#include <mach_host.h> +#include <norma_ipc.h> +#include <norma_vm.h> + +#include <mach/boolean.h> +#include <mach/machine.h> +#include <mach/task_special_ports.h> +#include <mach/vm_param.h> +#include <ipc/ipc_init.h> +#include <kern/cpu_number.h> +#include <kern/processor.h> +#include <kern/sched_prim.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/thread_swap.h> +#include <kern/time_out.h> +#include <kern/timer.h> +#include <kern/zalloc.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <machine/machspl.h> +#include <machine/pmap.h> +#include <mach/version.h> + + + +extern void vm_mem_init(); +extern void vm_mem_bootstrap(); +extern void init_timeout(); +extern void machine_init(); + +extern void idle_thread(); +extern void vm_pageout(); +extern void reaper_thread(); +extern void swapin_thread(); +extern void sched_thread(); + +extern void bootstrap_create(); +extern void device_service_create(); + +void cpu_launch_first_thread(); /* forward */ +void start_kernel_threads(); /* forward */ + +#if NCPUS > 1 +extern void start_other_cpus(); +extern void action_thread(); +#endif NCPUS > 1 + +/* XX */ +extern vm_offset_t phys_first_addr, phys_last_addr; + +/* + * Running in virtual memory, on the interrupt stack. + * Does not return. Dispatches initial thread. + * + * Assumes that master_cpu is set. + */ +void setup_main() +{ + thread_t startup_thread; + + panic_init(); + printf_init(); + + sched_init(); + vm_mem_bootstrap(); + ipc_bootstrap(); + vm_mem_init(); + ipc_init(); + + /* + * As soon as the virtual memory system is up, we record + * that this CPU is using the kernel pmap. + */ + PMAP_ACTIVATE_KERNEL(master_cpu); + + init_timers(); + init_timeout(); + +#if XPR_DEBUG + xprbootstrap(); +#endif XPR_DEBUG + + timestamp_init(); + + mapable_time_init(); + + machine_init(); + + machine_info.max_cpus = NCPUS; + machine_info.memory_size = phys_last_addr - phys_first_addr; /* XXX mem_size */ + machine_info.avail_cpus = 0; + machine_info.major_version = KERNEL_MAJOR_VERSION; + machine_info.minor_version = KERNEL_MINOR_VERSION; + + /* + * Initialize the IPC, task, and thread subsystems. + */ + task_init(); + thread_init(); + swapper_init(); +#if MACH_HOST + pset_sys_init(); +#endif MACH_HOST + + /* + * Kick off the time-out driven routines by calling + * them the first time. + */ + recompute_priorities(); + compute_mach_factor(); + + /* + * Create a kernel thread to start the other kernel + * threads. Thread_resume (from kernel_thread) calls + * thread_setrun, which may look at current thread; + * we must avoid this, since there is no current thread. + */ + + /* + * Create the thread, and point it at the routine. + */ + (void) thread_create(kernel_task, &startup_thread); + thread_start(startup_thread, start_kernel_threads); + + /* + * Give it a kernel stack. + */ + thread_doswapin(startup_thread); + + /* + * Pretend it is already running, and resume it. + * Since it looks as if it is running, thread_resume + * will not try to put it on the run queues. + * + * We can do all of this without locking, because nothing + * else is running yet. + */ + startup_thread->state |= TH_RUN; + (void) thread_resume(startup_thread); + + /* + * Start the thread. + */ + cpu_launch_first_thread(startup_thread); + /*NOTREACHED*/ +} + +/* + * Now running in a thread. Create the rest of the kernel threads + * and the bootstrap task. + */ +void start_kernel_threads() +{ + register int i; + + /* + * Create the idle threads and the other + * service threads. + */ + for (i = 0; i < NCPUS; i++) { + if (machine_slot[i].is_cpu) { + thread_t th; + + (void) thread_create(kernel_task, &th); + thread_bind(th, cpu_to_processor(i)); + thread_start(th, idle_thread); + thread_doswapin(th); + (void) thread_resume(th); + } + } + + (void) kernel_thread(kernel_task, reaper_thread, (char *) 0); + (void) kernel_thread(kernel_task, swapin_thread, (char *) 0); + (void) kernel_thread(kernel_task, sched_thread, (char *) 0); + +#if NCPUS > 1 + /* + * Create the shutdown thread. + */ + (void) kernel_thread(kernel_task, action_thread, (char *) 0); + + /* + * Allow other CPUs to run. + */ + start_other_cpus(); +#endif NCPUS > 1 + + /* + * Create the device service. + */ + device_service_create(); + + /* + * Initialize NORMA ipc system. + */ +#if NORMA_IPC + norma_ipc_init(); +#endif NORMA_IPC + + /* + * Initialize NORMA vm system. + */ +#if NORMA_VM + norma_vm_init(); +#endif NORMA_VM + + /* + * Start the user bootstrap. + */ + bootstrap_create(); + +#if XPR_DEBUG + xprinit(); /* XXX */ +#endif XPR_DEBUG + + /* + * Become the pageout daemon. + */ + (void) spl0(); + vm_pageout(); + /*NOTREACHED*/ +} + +#if NCPUS > 1 +void slave_main() +{ + cpu_launch_first_thread(THREAD_NULL); +} +#endif NCPUS > 1 + +/* + * Start up the first thread on a CPU. + * First thread is specified for the master CPU. + */ +void cpu_launch_first_thread(th) + register thread_t th; +{ + register int mycpu; + + mycpu = cpu_number(); + + cpu_up(mycpu); + + start_timer(&kernel_timer[mycpu]); + + /* + * Block all interrupts for choose_thread. + */ + (void) splhigh(); + + if (th == THREAD_NULL) + th = choose_thread(cpu_to_processor(mycpu)); + if (th == THREAD_NULL) + panic("cpu_launch_first_thread"); + + startrtclock(); /* needs an active thread */ + PMAP_ACTIVATE_KERNEL(mycpu); + + active_threads[mycpu] = th; + active_stacks[mycpu] = th->kernel_stack; + thread_lock(th); + th->state &= ~TH_UNINT; + thread_unlock(th); + timer_switch(&th->system_timer); + + PMAP_ACTIVATE_USER(vm_map_pmap(th->task->map), th, mycpu); + + load_context(th); + /*NOTREACHED*/ +} diff --git a/kern/strings.c b/kern/strings.c new file mode 100644 index 0000000..89563cd --- /dev/null +++ b/kern/strings.c @@ -0,0 +1,174 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: strings.c + * Author: Robert V. Baron, Carnegie Mellon University + * Date: ??/92 + * + * String functions. + */ + +#include <kern/strings.h> /* make sure we sell the truth */ + +#ifdef strcpy +#undef strcmp +#undef strncmp +#undef strcpy +#undef strncpy +#undef strlen +#endif + +/* + * Abstract: + * strcmp (s1, s2) compares the strings "s1" and "s2". + * It returns 0 if the strings are identical. It returns + * > 0 if the first character that differs in the two strings + * is larger in s1 than in s2 or if s1 is longer than s2 and + * the contents are identical up to the length of s2. + * It returns < 0 if the first differing character is smaller + * in s1 than in s2 or if s1 is shorter than s2 and the + * contents are identical upto the length of s1. + */ + +int +strcmp( + register const char *s1, + register const char *s2) +{ + register unsigned int a, b; + + do { + a = *s1++; + b = *s2++; + if (a != b) + return a-b; /* includes case when + 'a' is zero and 'b' is not zero + or vice versa */ + } while (a != '\0'); + + return 0; /* both are zero */ +} + + +/* + * Abstract: + * strncmp (s1, s2, n) compares the strings "s1" and "s2" + * in exactly the same way as strcmp does. Except the + * comparison runs for at most "n" characters. + */ + +int +strncmp( + register const char *s1, + register const char *s2, + unsigned long n) +{ + register unsigned int a, b; + + while (n != 0) { + a = *s1++; + b = *s2++; + if (a != b) + return a-b; /* includes case when + 'a' is zero and 'b' is not zero + or vice versa */ + if (a == '\0') + return 0; /* both are zero */ + n--; + } + + return 0; +} + + +/* + * Abstract: + * strcpy copies the contents of the string "from" including + * the null terminator to the string "to". A pointer to "to" + * is returned. + */ + +char * +strcpy( + register char *to, + register const char *from) +{ + register char *ret = to; + + while ((*to++ = *from++) != '\0') + continue; + + return ret; +} + +/* + * Abstract: + * strncpy copies "count" characters from the "from" string to + * the "to" string. If "from" contains less than "count" characters + * "to" will be padded with null characters until exactly "count" + * characters have been written. The return value is a pointer + * to the "to" string. + */ + +char * +strncpy( + register char *to, + register const char *from, + register unsigned long count) +{ + register char *ret = to; + + while (count != 0) { + count--; + if ((*to++ = *from++) == '\0') + break; + } + + while (count != 0) { + *to++ = '\0'; + count--; + } + + return ret; +} + +/* + * Abstract: + * strlen returns the number of characters in "string" preceeding + * the terminating null character. + */ + +unsigned long +strlen( + register const char *string) +{ + register const char *ret = string; + + while (*string++ != '\0') + continue; + + return string - 1 - ret; +} diff --git a/kern/strings.h b/kern/strings.h new file mode 100644 index 0000000..b71a7b3 --- /dev/null +++ b/kern/strings.h @@ -0,0 +1,53 @@ +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: strings.h + * Author: Alessandro Forin, Carnegie Mellon University + * Date: 3/93 + * + * Prototypes for string functions. The way GCC wants them. + */ + +extern int strcmp( + const char *, + const char * ); + +extern int strncmp( + const char *, + const char *, + unsigned long); + +extern char *strcpy( + char *, + const char *); + +extern char *strncpy( + char *, + const char *, + unsigned long); + +extern unsigned long strlen( + const char *); diff --git a/kern/syscall_emulation.c b/kern/syscall_emulation.c new file mode 100644 index 0000000..5443a33 --- /dev/null +++ b/kern/syscall_emulation.c @@ -0,0 +1,518 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/error.h> +#include <mach/vm_param.h> +#include <kern/syscall_emulation.h> +#include <kern/task.h> +#include <kern/kalloc.h> +#include <vm/vm_kern.h> + +/* XXX */ +#define syscall_emulation_sync(task) + + + +/* + * WARNING: + * This code knows that kalloc() allocates memory most efficiently + * in sizes that are powers of 2, and asks for those sizes. + */ + +/* + * Go from number of entries to size of struct eml_dispatch and back. + */ +#define base_size (sizeof(struct eml_dispatch) - sizeof(eml_routine_t)) +#define count_to_size(count) \ + (base_size + sizeof(vm_offset_t) * (count)) + +#define size_to_count(size) \ + ( ((size) - base_size) / sizeof(vm_offset_t) ) + +/* + * eml_init: initialize user space emulation code + */ +void eml_init() +{ +} + +/* + * eml_task_reference() [Exported] + * + * Bumps the reference count on the common emulation + * vector. + */ + +void eml_task_reference(task, parent) + task_t task, parent; +{ + register eml_dispatch_t eml; + + if (parent == TASK_NULL) + eml = EML_DISPATCH_NULL; + else + eml = parent->eml_dispatch; + + if (eml != EML_DISPATCH_NULL) { + simple_lock(&eml->lock); + eml->ref_count++; + simple_unlock(&eml->lock); + } + task->eml_dispatch = eml; +} + + +/* + * eml_task_deallocate() [Exported] + * + * Cleans up after the emulation code when a process exits. + */ + +void eml_task_deallocate(task) + task_t task; +{ + register eml_dispatch_t eml; + + eml = task->eml_dispatch; + if (eml != EML_DISPATCH_NULL) { + int count; + + simple_lock(&eml->lock); + count = --eml->ref_count; + simple_unlock(&eml->lock); + + if (count == 0) + kfree((vm_offset_t)eml, count_to_size(eml->disp_count)); + } +} + +/* + * task_set_emulation_vector: [Server Entry] + * set a list of emulated system calls for this task. + */ +kern_return_t +task_set_emulation_vector_internal(task, vector_start, emulation_vector, + emulation_vector_count) + task_t task; + int vector_start; + emulation_vector_t emulation_vector; + unsigned int emulation_vector_count; +{ + eml_dispatch_t cur_eml, new_eml, old_eml; + vm_size_t new_size; + int cur_start, cur_end; + int new_start = 0, new_end = 0; + int vector_end; + + if (task == TASK_NULL) + return EML_BAD_TASK; + + vector_end = vector_start + emulation_vector_count; + + /* + * We try to re-use the existing emulation vector + * if possible. We can reuse the vector if it + * is not shared with another task and if it is + * large enough to contain the entries we are + * supplying. + * + * We must grab the lock on the task to check whether + * there is an emulation vector. + * If the vector is shared or not large enough, we + * need to drop the lock and allocate a new emulation + * vector. + * + * While the lock is dropped, the emulation vector + * may be released by all other tasks (giving us + * exclusive use), or may be enlarged by another + * task_set_emulation_vector call. Therefore, + * after allocating the new emulation vector, we + * must grab the lock again to check whether we + * really need the new vector we just allocated. + * + * Since an emulation vector cannot be altered + * if it is in use by more than one task, the + * task lock is sufficient to protect the vector`s + * start, count, and contents. The lock in the + * vector protects only the reference count. + */ + + old_eml = EML_DISPATCH_NULL; /* vector to discard */ + new_eml = EML_DISPATCH_NULL; /* new vector */ + + for (;;) { + /* + * Find the current emulation vector. + * See whether we can overwrite it. + */ + task_lock(task); + cur_eml = task->eml_dispatch; + if (cur_eml != EML_DISPATCH_NULL) { + cur_start = cur_eml->disp_min; + cur_end = cur_eml->disp_count + cur_start; + + simple_lock(&cur_eml->lock); + if (cur_eml->ref_count == 1 && + cur_start <= vector_start && + cur_end >= vector_end) + { + /* + * Can use the existing emulation vector. + * Discard any new one we allocated. + */ + simple_unlock(&cur_eml->lock); + old_eml = new_eml; + break; + } + + if (new_eml != EML_DISPATCH_NULL && + new_start <= cur_start && + new_end >= cur_end) + { + /* + * A new vector was allocated, and it is large enough + * to hold all the entries from the current vector. + * Copy the entries to the new emulation vector, + * deallocate the current one, and use the new one. + */ + bcopy((char *)&cur_eml->disp_vector[0], + (char *)&new_eml->disp_vector[cur_start-new_start], + cur_eml->disp_count * sizeof(vm_offset_t)); + + if (--cur_eml->ref_count == 0) + old_eml = cur_eml; /* discard old vector */ + simple_unlock(&cur_eml->lock); + + task->eml_dispatch = new_eml; + syscall_emulation_sync(task); + cur_eml = new_eml; + break; + } + simple_unlock(&cur_eml->lock); + + /* + * Need a new emulation vector. + * Ensure it will hold all the entries from + * both the old and new emulation vectors. + */ + new_start = vector_start; + if (new_start > cur_start) + new_start = cur_start; + new_end = vector_end; + if (new_end < cur_end) + new_end = cur_end; + } + else { + /* + * There is no current emulation vector. + * If a new one was allocated, use it. + */ + if (new_eml != EML_DISPATCH_NULL) { + task->eml_dispatch = new_eml; + cur_eml = new_eml; + break; + } + + /* + * Compute the size needed for the new vector. + */ + new_start = vector_start; + new_end = vector_end; + } + + /* + * Have no vector (or one that is no longer large enough). + * Drop all the locks and allocate a new vector. + * Repeat the loop to check whether the old vector was + * changed while we didn`t hold the locks. + */ + + task_unlock(task); + + if (new_eml != EML_DISPATCH_NULL) + kfree((vm_offset_t)new_eml, count_to_size(new_eml->disp_count)); + + new_size = count_to_size(new_end - new_start); + new_eml = (eml_dispatch_t) kalloc(new_size); + + bzero((char *)new_eml, new_size); + simple_lock_init(&new_eml->lock); + new_eml->ref_count = 1; + new_eml->disp_min = new_start; + new_eml->disp_count = new_end - new_start; + + continue; + } + + /* + * We have the emulation vector. + * Install the new emulation entries. + */ + bcopy((char *)&emulation_vector[0], + (char *)&cur_eml->disp_vector[vector_start - cur_eml->disp_min], + emulation_vector_count * sizeof(vm_offset_t)); + + task_unlock(task); + + /* + * Discard any old emulation vector we don`t need. + */ + if (old_eml) + kfree((vm_offset_t) old_eml, count_to_size(old_eml->disp_count)); + + return KERN_SUCCESS; +} + +/* + * task_set_emulation_vector: [Server Entry] + * + * Set the list of emulated system calls for this task. + * The list is out-of-line. + */ +kern_return_t +task_set_emulation_vector(task, vector_start, emulation_vector, + emulation_vector_count) + task_t task; + int vector_start; + emulation_vector_t emulation_vector; + unsigned int emulation_vector_count; +{ + kern_return_t kr; + vm_offset_t emul_vector_addr; + + if (task == TASK_NULL) + return EML_BAD_TASK; /* XXX sb KERN_INVALID_ARGUMENT */ + + /* + * The emulation vector is really a vm_map_copy_t. + */ + kr = vm_map_copyout(ipc_kernel_map, &emul_vector_addr, + (vm_map_copy_t) emulation_vector); + if (kr != KERN_SUCCESS) + return kr; + + /* + * Do the work. + */ + kr = task_set_emulation_vector_internal( + task, + vector_start, + (emulation_vector_t) emul_vector_addr, + emulation_vector_count); + + /* + * Discard the memory + */ + (void) kmem_free(ipc_kernel_map, + emul_vector_addr, + emulation_vector_count * sizeof(eml_dispatch_t)); + + return kr; +} + +/* + * Compatibility entry. Vector is passed inline. + */ +kern_return_t +xxx_task_set_emulation_vector(task, vector_start, emulation_vector, + emulation_vector_count) + task_t task; + int vector_start; + emulation_vector_t emulation_vector; + unsigned int emulation_vector_count; +{ + return task_set_emulation_vector_internal( + task, + vector_start, + emulation_vector, + emulation_vector_count); +} + +/* + * task_get_emulation_vector: [Server Entry] + * + * Get the list of emulated system calls for this task. + * List is returned out-of-line. + */ +kern_return_t +task_get_emulation_vector(task, vector_start, emulation_vector, + emulation_vector_count) + task_t task; + int *vector_start; /* out */ + emulation_vector_t *emulation_vector; /* out */ + unsigned int *emulation_vector_count; /* out */ +{ + eml_dispatch_t eml; + vm_size_t vector_size, size; + vm_offset_t addr; + + if (task == TASK_NULL) + return EML_BAD_TASK; + + addr = 0; + size = 0; + + for(;;) { + vm_size_t size_needed; + + task_lock(task); + eml = task->eml_dispatch; + if (eml == EML_DISPATCH_NULL) { + task_unlock(task); + if (addr) + (void) kmem_free(ipc_kernel_map, addr, size); + *vector_start = 0; + *emulation_vector = 0; + *emulation_vector_count = 0; + return KERN_SUCCESS; + } + + /* + * Do we have the memory we need? + */ + vector_size = eml->disp_count * sizeof(vm_offset_t); + + size_needed = round_page(vector_size); + if (size_needed <= size) + break; + + /* + * If not, unlock the task and allocate more memory. + */ + task_unlock(task); + + if (size != 0) + kmem_free(ipc_kernel_map, addr, size); + + size = size_needed; + if (kmem_alloc(ipc_kernel_map, &addr, size) != KERN_SUCCESS) + return KERN_RESOURCE_SHORTAGE; + } + + /* + * Copy out the dispatch addresses + */ + *vector_start = eml->disp_min; + *emulation_vector_count = eml->disp_count; + bcopy((char *)eml->disp_vector, + (char *)addr, + vector_size); + + /* + * Unlock the task and free any memory we did not need + */ + task_unlock(task); + + { + vm_size_t size_used, size_left; + vm_map_copy_t memory; + + /* + * Free any unused memory beyond the end of the last page used + */ + size_used = round_page(vector_size); + if (size_used != size) + (void) kmem_free(ipc_kernel_map, + addr + size_used, + size - size_used); + + /* + * Zero the remainder of the page being returned. + */ + size_left = size_used - vector_size; + if (size_left > 0) + bzero((char *)addr + vector_size, size_left); + + /* + * Make memory into copyin form - this unwires it. + */ + (void) vm_map_copyin(ipc_kernel_map, addr, vector_size, TRUE, &memory); + + *emulation_vector = (emulation_vector_t) memory; + } + + return KERN_SUCCESS; +} + +/* + * xxx_task_get_emulation: [Server Entry] + * get the list of emulated system calls for this task. + * Compatibility code: return list in-line. + */ +kern_return_t +xxx_task_get_emulation_vector(task, vector_start, emulation_vector, + emulation_vector_count) + task_t task; + int *vector_start; + emulation_vector_t emulation_vector; /* pointer to OUT array */ + unsigned int *emulation_vector_count; /*IN/OUT*/ +{ + register eml_dispatch_t eml; + + if (task == TASK_NULL) + return( EML_BAD_TASK ); + + task_lock(task); + + eml = task->eml_dispatch; + if (eml == EML_DISPATCH_NULL) { + task_unlock(task); + *vector_start = 0; + *emulation_vector_count = 0; + return( KERN_SUCCESS ); + } + + simple_lock(&eml->lock); + + if (*emulation_vector_count < eml->disp_count) { + simple_unlock(&eml->lock); + task_unlock(task); + return( EML_BAD_CNT ); + } + + *vector_start = eml->disp_min; + *emulation_vector_count = eml->disp_count; + bcopy((char *)eml->disp_vector, (char *)emulation_vector, + *emulation_vector_count * sizeof(vm_offset_t)); + simple_unlock(&eml->lock); + + task_unlock(task); + + return( KERN_SUCCESS ); +} + +/* + * task_set_emulation: [Server Entry] + * set up for user space emulation of syscalls within this task. + */ +kern_return_t task_set_emulation(task, routine_entry_pt, routine_number) + task_t task; + vm_offset_t routine_entry_pt; + int routine_number; +{ + return task_set_emulation_vector_internal(task, routine_number, + &routine_entry_pt, 1); +} diff --git a/kern/syscall_emulation.h b/kern/syscall_emulation.h new file mode 100644 index 0000000..91b3ed7 --- /dev/null +++ b/kern/syscall_emulation.h @@ -0,0 +1,61 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_SYSCALL_EMULATION_H_ +#define _KERN_SYSCALL_EMULATION_H_ + +#ifndef ASSEMBLER +#include <mach/machine/vm_types.h> +#include <kern/lock.h> + +typedef vm_offset_t eml_routine_t; + +typedef struct eml_dispatch { + decl_simple_lock_data(, lock) /* lock for reference count */ + int ref_count; /* reference count */ + int disp_count; /* count of entries in vector */ + int disp_min; /* index of lowest entry in vector */ + eml_routine_t disp_vector[1]; /* first entry in array of dispatch */ + /* routines (array has disp_count */ + /* elements) */ +} *eml_dispatch_t; + +typedef vm_offset_t *emulation_vector_t; /* Variable-length array */ + +#define EML_ROUTINE_NULL (eml_routine_t)0 +#define EML_DISPATCH_NULL (eml_dispatch_t)0 + +#define EML_SUCCESS (0) + +#define EML_MOD (err_kern|err_sub(2)) +#define EML_BAD_TASK (EML_MOD|0x0001) +#define EML_BAD_CNT (EML_MOD|0x0002) +#endif ASSEMBLER + +#endif _KERN_SYSCALL_EMULATION_H_ diff --git a/kern/syscall_subr.c b/kern/syscall_subr.c new file mode 100644 index 0000000..a040d71 --- /dev/null +++ b/kern/syscall_subr.c @@ -0,0 +1,399 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach_fixpri.h> +#include <cpus.h> + +#include <mach/boolean.h> +#include <mach/thread_switch.h> +#include <ipc/ipc_port.h> +#include <ipc/ipc_space.h> +#include <kern/counters.h> +#include <kern/ipc_kobject.h> +#include <kern/processor.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/ipc_sched.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/time_out.h> +#include <machine/machspl.h> /* for splsched */ + +#if MACH_FIXPRI +#include <mach/policy.h> +#endif MACH_FIXPRI + + + +/* + * swtch and swtch_pri both attempt to context switch (logic in + * thread_block no-ops the context switch if nothing would happen). + * A boolean is returned that indicates whether there is anything + * else runnable. + * + * This boolean can be used by a thread waiting on a + * lock or condition: If FALSE is returned, the thread is justified + * in becoming a resource hog by continuing to spin because there's + * nothing else useful that the processor could do. If TRUE is + * returned, the thread should make one more check on the + * lock and then be a good citizen and really suspend. + */ + +extern void thread_depress_priority(); +extern kern_return_t thread_depress_abort(); + +#ifdef CONTINUATIONS +void swtch_continue() +{ + register processor_t myprocessor; + + myprocessor = current_processor(); + thread_syscall_return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); + /*NOTREACHED*/ +} +#else /* not CONTINUATIONS */ +#define swtch_continue 0 +#endif /* not CONTINUATIONS */ + +boolean_t swtch() +{ + register processor_t myprocessor; + +#if NCPUS > 1 + myprocessor = current_processor(); + if (myprocessor->runq.count == 0 && + myprocessor->processor_set->runq.count == 0) + return(FALSE); +#endif NCPUS > 1 + + counter(c_swtch_block++); + thread_block(swtch_continue); + myprocessor = current_processor(); + return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); +} + +#ifdef CONTINUATIONS +void swtch_pri_continue() +{ + register thread_t thread = current_thread(); + register processor_t myprocessor; + + if (thread->depress_priority >= 0) + (void) thread_depress_abort(thread); + myprocessor = current_processor(); + thread_syscall_return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); + /*NOTREACHED*/ +} +#else /* not CONTINUATIONS */ +#define swtch_pri_continue 0 +#endif /* not CONTINUATIONS */ + +boolean_t swtch_pri(pri) + int pri; +{ + register thread_t thread = current_thread(); + register processor_t myprocessor; + +#ifdef lint + pri++; +#endif lint + +#if NCPUS > 1 + myprocessor = current_processor(); + if (myprocessor->runq.count == 0 && + myprocessor->processor_set->runq.count == 0) + return(FALSE); +#endif NCPUS > 1 + + /* + * XXX need to think about depression duration. + * XXX currently using min quantum. + */ + thread_depress_priority(thread, min_quantum); + + counter(c_swtch_pri_block++); + thread_block(swtch_pri_continue); + + if (thread->depress_priority >= 0) + (void) thread_depress_abort(thread); + myprocessor = current_processor(); + return(myprocessor->runq.count > 0 || + myprocessor->processor_set->runq.count > 0); +} + +extern int hz; + +#ifdef CONTINUATIONS +void thread_switch_continue() +{ + register thread_t cur_thread = current_thread(); + + /* + * Restore depressed priority + */ + if (cur_thread->depress_priority >= 0) + (void) thread_depress_abort(cur_thread); + thread_syscall_return(KERN_SUCCESS); + /*NOTREACHED*/ +} +#else /* not CONTINUATIONS */ +#define thread_switch_continue 0 +#endif /* not CONTINUATIONS */ + +/* + * thread_switch: + * + * Context switch. User may supply thread hint. + * + * Fixed priority threads that call this get what they asked for + * even if that violates priority order. + */ +kern_return_t thread_switch(thread_name, option, option_time) +mach_port_t thread_name; +int option; +mach_msg_timeout_t option_time; +{ + register thread_t cur_thread = current_thread(); + register processor_t myprocessor; + ipc_port_t port; + + /* + * Process option. + */ + switch (option) { + case SWITCH_OPTION_NONE: + /* + * Nothing to do. + */ + break; + + case SWITCH_OPTION_DEPRESS: + /* + * Depress priority for given time. + */ + thread_depress_priority(cur_thread, option_time); + break; + + case SWITCH_OPTION_WAIT: + thread_will_wait_with_timeout(cur_thread, option_time); + break; + + default: + return(KERN_INVALID_ARGUMENT); + } + +#ifndef MIGRATING_THREADS /* XXX thread_run defunct */ + /* + * Check and act on thread hint if appropriate. + */ + if ((thread_name != 0) && + (ipc_port_translate_send(cur_thread->task->itk_space, + thread_name, &port) == KERN_SUCCESS)) { + /* port is locked, but it might not be active */ + + /* + * Get corresponding thread. + */ + if (ip_active(port) && (ip_kotype(port) == IKOT_THREAD)) { + register thread_t thread; + register spl_t s; + + thread = (thread_t) port->ip_kobject; + /* + * Check if the thread is in the right pset. Then + * pull it off its run queue. If it + * doesn't come, then it's not eligible. + */ + s = splsched(); + thread_lock(thread); + if ((thread->processor_set == cur_thread->processor_set) + && (rem_runq(thread) != RUN_QUEUE_NULL)) { + /* + * Hah, got it!! + */ + thread_unlock(thread); + (void) splx(s); + ip_unlock(port); + /* XXX thread might disappear on us now? */ +#if MACH_FIXPRI + if (thread->policy == POLICY_FIXEDPRI) { + myprocessor = current_processor(); + myprocessor->quantum = thread->sched_data; + myprocessor->first_quantum = TRUE; + } +#endif MACH_FIXPRI + counter(c_thread_switch_handoff++); + thread_run(thread_switch_continue, thread); + /* + * Restore depressed priority + */ + if (cur_thread->depress_priority >= 0) + (void) thread_depress_abort(cur_thread); + + return(KERN_SUCCESS); + } + thread_unlock(thread); + (void) splx(s); + } + ip_unlock(port); + } +#endif /* not MIGRATING_THREADS */ + + /* + * No handoff hint supplied, or hint was wrong. Call thread_block() in + * hopes of running something else. If nothing else is runnable, + * thread_block will detect this. WARNING: thread_switch with no + * option will not do anything useful if the thread calling it is the + * highest priority thread (can easily happen with a collection + * of timesharing threads). + */ +#if NCPUS > 1 + myprocessor = current_processor(); + if (myprocessor->processor_set->runq.count > 0 || + myprocessor->runq.count > 0) +#endif NCPUS > 1 + { + counter(c_thread_switch_block++); + thread_block(thread_switch_continue); + } + + /* + * Restore depressed priority + */ + if (cur_thread->depress_priority >= 0) + (void) thread_depress_abort(cur_thread); + return(KERN_SUCCESS); +} + +/* + * thread_depress_priority + * + * Depress thread's priority to lowest possible for specified period. + * Intended for use when thread wants a lock but doesn't know which + * other thread is holding it. As with thread_switch, fixed + * priority threads get exactly what they asked for. Users access + * this by the SWITCH_OPTION_DEPRESS option to thread_switch. A Time + * of zero will result in no timeout being scheduled. + */ +void +thread_depress_priority(thread, depress_time) +register thread_t thread; +mach_msg_timeout_t depress_time; +{ + unsigned int ticks; + spl_t s; + + /* convert from milliseconds to ticks */ + ticks = convert_ipc_timeout_to_ticks(depress_time); + + s = splsched(); + thread_lock(thread); + + /* + * If thread is already depressed, override previous depression. + */ + reset_timeout_check(&thread->depress_timer); + + /* + * Save current priority, then set priority and + * sched_pri to their lowest possible values. + */ + thread->depress_priority = thread->priority; + thread->priority = 31; + thread->sched_pri = 31; + if (ticks != 0) + set_timeout(&thread->depress_timer, ticks); + + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_depress_timeout: + * + * Timeout routine for priority depression. + */ +void +thread_depress_timeout(thread) +register thread_t thread; +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + + /* + * If we lose a race with thread_depress_abort, + * then depress_priority might be -1. + */ + + if (thread->depress_priority >= 0) { + thread->priority = thread->depress_priority; + thread->depress_priority = -1; + compute_priority(thread, FALSE); + } + + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_depress_abort: + * + * Prematurely abort priority depression if there is one. + */ +kern_return_t +thread_depress_abort(thread) +register thread_t thread; +{ + spl_t s; + + if (thread == THREAD_NULL) + return(KERN_INVALID_ARGUMENT); + + s = splsched(); + thread_lock(thread); + + /* + * Only restore priority if thread is depressed. + */ + if (thread->depress_priority >= 0) { + reset_timeout_check(&thread->depress_timer); + thread->priority = thread->depress_priority; + thread->depress_priority = -1; + compute_priority(thread, FALSE); + } + + thread_unlock(thread); + (void) splx(s); + return(KERN_SUCCESS); +} diff --git a/kern/syscall_subr.h b/kern/syscall_subr.h new file mode 100644 index 0000000..921c2b2 --- /dev/null +++ b/kern/syscall_subr.h @@ -0,0 +1,35 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_SYSCALL_SUBR_H_ +#define _KERN_SYSCALL_SUBR_H_ + +extern int swtch(); +extern int swtch_pri(); +extern int thread_switch(); +extern void thread_depress_timeout(); + +#endif _KERN_SYSCALL_SUBR_H_ diff --git a/kern/syscall_sw.c b/kern/syscall_sw.c new file mode 100644 index 0000000..41c8b2f --- /dev/null +++ b/kern/syscall_sw.c @@ -0,0 +1,289 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach_ipc_compat.h> +#include <net_atm.h> + +#include <mach/port.h> +#include <mach/kern_return.h> +#include <kern/syscall_sw.h> + +/* Include declarations of the trap functions. */ +#include <mach/mach_traps.h> +#include <mach/message.h> +#include <kern/syscall_subr.h> +#include <chips/nw_mk.h> + + +/* + * To add a new entry: + * Add an "MACH_TRAP(routine, arg count)" to the table below. + * + * Add trap definition to mach/syscall_sw.h and + * recompile user library. + * + * WARNING: If you add a trap which requires more than 7 + * parameters, mach/ca/syscall_sw.h and ca/trap.c both need + * to be modified for it to work successfully on an + * RT. Similarly, mach/mips/syscall_sw.h and mips/locore.s + * need to be modified before it will work on Pmaxen. + * + * WARNING: Don't use numbers 0 through -9. They (along with + * the positive numbers) are reserved for Unix. + */ + +int kern_invalid_debug = 0; + +mach_port_t null_port() +{ + if (kern_invalid_debug) Debugger("null_port mach trap"); + return(MACH_PORT_NULL); +} + +kern_return_t kern_invalid() +{ + if (kern_invalid_debug) Debugger("kern_invalid mach trap"); + return(KERN_INVALID_ARGUMENT); +} + +extern kern_return_t syscall_vm_map(); +extern kern_return_t syscall_vm_allocate(); +extern kern_return_t syscall_vm_deallocate(); + +extern kern_return_t syscall_task_create(); +extern kern_return_t syscall_task_terminate(); +extern kern_return_t syscall_task_suspend(); +extern kern_return_t syscall_task_set_special_port(); + +extern kern_return_t syscall_mach_port_allocate(); +extern kern_return_t syscall_mach_port_deallocate(); +extern kern_return_t syscall_mach_port_insert_right(); +extern kern_return_t syscall_mach_port_allocate_name(); + +extern kern_return_t syscall_thread_depress_abort(); +extern kern_return_t evc_wait(); +extern kern_return_t evc_wait_clear(); + +extern kern_return_t syscall_device_write_request(); +extern kern_return_t syscall_device_writev_request(); + +#ifdef FIPC +extern kern_return_t syscall_fipc_send(); +extern kern_return_t syscall_fipc_recv(); +#endif FIPC + +mach_trap_t mach_trap_table[] = { + MACH_TRAP(kern_invalid, 0), /* 0 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 1 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 2 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 3 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 4 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 5 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 6 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 7 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 8 */ /* Unix */ + MACH_TRAP(kern_invalid, 0), /* 9 */ /* Unix */ + +#if MACH_IPC_COMPAT + MACH_TRAP(task_self, 0), /* 10 */ /* obsolete */ + MACH_TRAP(thread_reply, 0), /* 11 */ /* obsolete */ + MACH_TRAP(task_notify, 0), /* 12 */ /* obsolete */ + MACH_TRAP(thread_self, 0), /* 13 */ /* obsolete */ +#else /* MACH_IPC_COMPAT */ + MACH_TRAP(null_port, 0), /* 10 */ + MACH_TRAP(null_port, 0), /* 11 */ + MACH_TRAP(null_port, 0), /* 12 */ + MACH_TRAP(null_port, 0), /* 13 */ +#endif /* MACH_IPC_COMPAT */ + MACH_TRAP(kern_invalid, 0), /* 14 */ + MACH_TRAP(kern_invalid, 0), /* 15 */ + MACH_TRAP(kern_invalid, 0), /* 16 */ + MACH_TRAP_STACK(evc_wait, 1), /* 17 */ + MACH_TRAP_STACK(evc_wait_clear, 1), /* 18 */ + MACH_TRAP(kern_invalid, 0), /* 19 */ + +#if MACH_IPC_COMPAT + MACH_TRAP(msg_send_trap, 4), /* 20 */ /* obsolete */ + MACH_TRAP_STACK(msg_receive_trap, 5), /* 21 */ /* obsolete */ + MACH_TRAP_STACK(msg_rpc_trap, 6), /* 22 */ /* obsolete */ +#else /* MACH_IPC_COMPAT */ + MACH_TRAP(kern_invalid, 0), /* 20 */ + MACH_TRAP(kern_invalid, 0), /* 21 */ + MACH_TRAP(kern_invalid, 0), /* 22 */ +#endif /* MACH_IPC_COMPAT */ + MACH_TRAP(kern_invalid, 0), /* 23 */ + MACH_TRAP(kern_invalid, 0), /* 24 */ + MACH_TRAP_STACK(mach_msg_trap, 7), /* 25 */ + MACH_TRAP(mach_reply_port, 0), /* 26 */ + MACH_TRAP(mach_thread_self, 0), /* 27 */ + MACH_TRAP(mach_task_self, 0), /* 28 */ + MACH_TRAP(mach_host_self, 0), /* 29 */ + + MACH_TRAP(kern_invalid, 0), /* 30 */ + MACH_TRAP(kern_invalid, 0), /* 31 */ + MACH_TRAP(kern_invalid, 0), /* 32 */ + MACH_TRAP(kern_invalid, 0), /* 33 emul: task_by_pid */ + MACH_TRAP(kern_invalid, 0), /* 34 emul: pid_by_task */ + MACH_TRAP(kern_invalid, 0), /* 35 */ + MACH_TRAP(kern_invalid, 0), /* 36 */ + MACH_TRAP(kern_invalid, 0), /* 37 */ + MACH_TRAP(kern_invalid, 0), /* 38 */ + + MACH_TRAP(syscall_device_writev_request, 6), /* 39 */ + MACH_TRAP(syscall_device_write_request, 6), /* 40 */ + + MACH_TRAP(kern_invalid, 0), /* 41 emul: init_process */ + MACH_TRAP(kern_invalid, 0), /* 42 */ + MACH_TRAP(kern_invalid, 0), /* 43 emul: map_fd */ + MACH_TRAP(kern_invalid, 0), /* 44 emul: rfs_make_symlink */ + MACH_TRAP(kern_invalid, 0), /* 45 */ + MACH_TRAP(kern_invalid, 0), /* 46 */ + MACH_TRAP(kern_invalid, 0), /* 47 */ + MACH_TRAP(kern_invalid, 0), /* 48 */ + MACH_TRAP(kern_invalid, 0), /* 49 */ + + MACH_TRAP(kern_invalid, 0), /* 50 */ + MACH_TRAP(kern_invalid, 0), /* 51 */ + MACH_TRAP(kern_invalid, 0), /* 52 emul: htg_syscall */ + MACH_TRAP(kern_invalid, 0), /* 53 emul: set_ras_address */ + MACH_TRAP(kern_invalid, 0), /* 54 */ +#if MACH_IPC_COMPAT + MACH_TRAP(host_self, 0), /* 55 */ +#else /* MACH_IPC_COMPAT */ + MACH_TRAP(null_port, 0), /* 55 */ +#endif /* MACH_IPC_COMPAT */ + MACH_TRAP(null_port, 0), /* 56 */ + MACH_TRAP(kern_invalid, 0), /* 57 */ + MACH_TRAP(kern_invalid, 0), /* 58 */ + MACH_TRAP_STACK(swtch_pri, 1), /* 59 */ + + MACH_TRAP_STACK(swtch, 0), /* 60 */ + MACH_TRAP_STACK(thread_switch, 3), /* 61 */ + MACH_TRAP(kern_invalid, 0), /* 62 */ + MACH_TRAP(kern_invalid, 0), /* 63 */ + MACH_TRAP(syscall_vm_map, 11), /* 64 */ + MACH_TRAP(syscall_vm_allocate, 4), /* 65 */ + MACH_TRAP(syscall_vm_deallocate, 3), /* 66 */ + MACH_TRAP(kern_invalid, 0), /* 67 */ + MACH_TRAP(syscall_task_create, 3), /* 68 */ + MACH_TRAP(syscall_task_terminate, 1), /* 69 */ + + MACH_TRAP(syscall_task_suspend, 1), /* 70 */ + MACH_TRAP(syscall_task_set_special_port, 3), /* 71 */ + MACH_TRAP(syscall_mach_port_allocate, 3), /* 72 */ + MACH_TRAP(syscall_mach_port_deallocate, 2), /* 73 */ + MACH_TRAP(syscall_mach_port_insert_right, 4), /* 74 */ + MACH_TRAP(syscall_mach_port_allocate_name, 3), /* 75 */ + MACH_TRAP(syscall_thread_depress_abort, 1), /* 76 */ + MACH_TRAP(kern_invalid, 0), /* 77 */ + MACH_TRAP(kern_invalid, 0), /* 78 */ + MACH_TRAP(kern_invalid, 0), /* 79 */ + +#if NET_ATM + MACH_TRAP(mk_update,3), /* 80 */ + MACH_TRAP(mk_lookup,2), /* 81 */ + MACH_TRAP_STACK(mk_endpoint_allocate,4), /* 82 */ + MACH_TRAP_STACK(mk_endpoint_deallocate,1), /* 83 */ + MACH_TRAP(mk_buffer_allocate,2), /* 84 */ + MACH_TRAP(mk_buffer_deallocate,2), /* 85 */ + MACH_TRAP_STACK(mk_connection_open,4), /* 86 */ + MACH_TRAP_STACK(mk_connection_accept,3), /* 87 */ + MACH_TRAP_STACK(mk_connection_close,1), /* 88 */ + MACH_TRAP_STACK(mk_multicast_add,4), /* 89 */ + MACH_TRAP_STACK(mk_multicast_drop,4), /* 90 */ + MACH_TRAP(mk_endpoint_status,3), /* 91 */ + MACH_TRAP_STACK(mk_send,3), /* 92 */ + MACH_TRAP_STACK(mk_receive,2), /* 93 */ + MACH_TRAP_STACK(mk_rpc,4), /* 94 */ + MACH_TRAP_STACK(mk_select,3), /* 95 */ +#else /* NET_ATM */ + MACH_TRAP(kern_invalid, 0), /* 80 */ + MACH_TRAP(kern_invalid, 0), /* 81 */ + MACH_TRAP(kern_invalid, 0), /* 82 */ + MACH_TRAP(kern_invalid, 0), /* 83 */ + MACH_TRAP(kern_invalid, 0), /* 84 */ + MACH_TRAP(kern_invalid, 0), /* 85 */ + MACH_TRAP(kern_invalid, 0), /* 86 */ + MACH_TRAP(kern_invalid, 0), /* 87 */ + MACH_TRAP(kern_invalid, 0), /* 88 */ + MACH_TRAP(kern_invalid, 0), /* 89 */ + MACH_TRAP(kern_invalid, 0), /* 90 */ + MACH_TRAP(kern_invalid, 0), /* 91 */ + MACH_TRAP(kern_invalid, 0), /* 92 */ + MACH_TRAP(kern_invalid, 0), /* 93 */ + MACH_TRAP(kern_invalid, 0), /* 94 */ + MACH_TRAP(kern_invalid, 0), /* 95 */ +#endif /* NET_ATM */ + +#ifdef FIPC + MACH_TRAP(syscall_fipc_send, 4), /* 96 */ + MACH_TRAP(syscall_fipc_recv, 5), /* 97 */ +#else + MACH_TRAP(kern_invalid, 0), /* 96 */ + MACH_TRAP(kern_invalid, 0), /* 97 */ +#endif FIPC + + MACH_TRAP(kern_invalid, 0), /* 98 */ + MACH_TRAP(kern_invalid, 0), /* 99 */ + + MACH_TRAP(kern_invalid, 0), /* 100 */ + MACH_TRAP(kern_invalid, 0), /* 101 */ + MACH_TRAP(kern_invalid, 0), /* 102 */ + MACH_TRAP(kern_invalid, 0), /* 103 */ + MACH_TRAP(kern_invalid, 0), /* 104 */ + MACH_TRAP(kern_invalid, 0), /* 105 */ + MACH_TRAP(kern_invalid, 0), /* 106 */ + MACH_TRAP(kern_invalid, 0), /* 107 */ + MACH_TRAP(kern_invalid, 0), /* 108 */ + MACH_TRAP(kern_invalid, 0), /* 109 */ + + MACH_TRAP(kern_invalid, 0), /* 110 */ + MACH_TRAP(kern_invalid, 0), /* 111 */ + MACH_TRAP(kern_invalid, 0), /* 112 */ + MACH_TRAP(kern_invalid, 0), /* 113 */ + MACH_TRAP(kern_invalid, 0), /* 114 */ + MACH_TRAP(kern_invalid, 0), /* 115 */ + MACH_TRAP(kern_invalid, 0), /* 116 */ + MACH_TRAP(kern_invalid, 0), /* 117 */ + MACH_TRAP(kern_invalid, 0), /* 118 */ + MACH_TRAP(kern_invalid, 0), /* 119 */ + + MACH_TRAP(kern_invalid, 0), /* 120 */ + MACH_TRAP(kern_invalid, 0), /* 121 */ + MACH_TRAP(kern_invalid, 0), /* 122 */ + MACH_TRAP(kern_invalid, 0), /* 123 */ + MACH_TRAP(kern_invalid, 0), /* 124 */ + MACH_TRAP(kern_invalid, 0), /* 125 */ + MACH_TRAP(kern_invalid, 0), /* 126 */ + MACH_TRAP(kern_invalid, 0), /* 127 */ + MACH_TRAP(kern_invalid, 0), /* 128 */ + MACH_TRAP(kern_invalid, 0), /* 129 */ +}; + +int mach_trap_count = (sizeof(mach_trap_table) / sizeof(mach_trap_table[0])); diff --git a/kern/syscall_sw.h b/kern/syscall_sw.h new file mode 100644 index 0000000..6a21ff5 --- /dev/null +++ b/kern/syscall_sw.h @@ -0,0 +1,51 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_SYSCALL_SW_H_ +#define _KERN_SYSCALL_SW_H_ + +/* + * mach_trap_stack indicates the trap may discard + * its kernel stack. Some architectures may need + * to save more state in the pcb for these traps. + */ + +typedef struct { + int mach_trap_arg_count; + int (*mach_trap_function)(); + boolean_t mach_trap_stack; + int mach_trap_unused; +} mach_trap_t; + +extern mach_trap_t mach_trap_table[]; +extern int mach_trap_count; + +#define MACH_TRAP(name, arg_count) \ + { (arg_count), (int (*)()) (name), FALSE, 0 } +#define MACH_TRAP_STACK(name, arg_count) \ + { (arg_count), (int (*)()) (name), TRUE, 0 } + +#endif _KERN_SYSCALL_SW_H_ diff --git a/kern/task.c b/kern/task.c new file mode 100644 index 0000000..f72bb0f --- /dev/null +++ b/kern/task.c @@ -0,0 +1,1238 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/task.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub, + * David Black + * + * Task management primitives implementation. + */ + +#include <mach_host.h> +#include <mach_pcsample.h> +#include <norma_task.h> +#include <fast_tas.h> +#include <net_atm.h> + +#include <mach/machine/vm_types.h> +#include <mach/vm_param.h> +#include <mach/task_info.h> +#include <mach/task_special_ports.h> +#include <ipc/ipc_space.h> +#include <ipc/ipc_types.h> +#include <kern/mach_param.h> +#include <kern/task.h> +#include <kern/thread.h> +#include <kern/zalloc.h> +#include <kern/kalloc.h> +#include <kern/processor.h> +#include <kern/sched_prim.h> /* for thread_wakeup */ +#include <kern/ipc_tt.h> +#include <vm/vm_kern.h> /* for kernel_map, ipc_kernel_map */ +#include <machine/machspl.h> /* for splsched */ + +#if NET_ATM +#include <chips/nw_mk.h> +#endif + +#if NORMA_TASK +#define task_create task_create_local +#endif /* NORMA_TASK */ + +task_t kernel_task = TASK_NULL; +zone_t task_zone; + +extern void eml_init(void); +extern void eml_task_reference(task_t, task_t); +extern void eml_task_deallocate(task_t); + +void task_init(void) +{ + task_zone = zinit( + sizeof(struct task), + TASK_MAX * sizeof(struct task), + TASK_CHUNK * sizeof(struct task), + 0, "tasks"); + + eml_init(); + + /* + * Create the kernel task as the first task. + * Task_create must assign to kernel_task as a side effect, + * for other initialization. (:-() + */ + (void) task_create(TASK_NULL, FALSE, &kernel_task); +} + +/* + * Create a task running in the kernel address space. It may + * have its own map of size mem_size (if 0, it uses the kernel map), + * and may have ipc privileges. + */ +task_t kernel_task_create( + task_t parent_task, + vm_size_t map_size) +{ + task_t new_task; + vm_offset_t min, max; + + /* + * Create the task. + */ + (void) task_create(parent_task, FALSE, &new_task); + + /* + * Task_create creates the task with a user-space map. + * Remove the map and replace it with the kernel map + * or a submap of the kernel map. + */ + vm_map_deallocate(new_task->map); + if (map_size == 0) + new_task->map = kernel_map; + else + new_task->map = kmem_suballoc(kernel_map, &min, &max, + map_size, FALSE); + + return new_task; +} + +kern_return_t task_create( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task) /* OUT */ +{ + register task_t new_task; + register processor_set_t pset; + int i; + + new_task = (task_t) zalloc(task_zone); + if (new_task == TASK_NULL) { + panic("task_create: no memory for task structure"); + } + + /* one ref for just being alive; one for our caller */ + new_task->ref_count = 2; + + if (child_task == &kernel_task) { + new_task->map = kernel_map; + } else if (inherit_memory) { + new_task->map = vm_map_fork(parent_task->map); + } else { + new_task->map = vm_map_create(pmap_create(0), + round_page(VM_MIN_ADDRESS), + trunc_page(VM_MAX_ADDRESS), TRUE); + } + + simple_lock_init(&new_task->lock); + queue_init(&new_task->thread_list); + new_task->suspend_count = 0; + new_task->active = TRUE; + new_task->user_stop_count = 0; + new_task->thread_count = 0; + + eml_task_reference(new_task, parent_task); + + ipc_task_init(new_task, parent_task); + +#if NET_ATM + new_task->nw_ep_owned = 0; +#endif + + new_task->total_user_time.seconds = 0; + new_task->total_user_time.microseconds = 0; + new_task->total_system_time.seconds = 0; + new_task->total_system_time.microseconds = 0; + + if (parent_task != TASK_NULL) { + task_lock(parent_task); + pset = parent_task->processor_set; + if (!pset->active) + pset = &default_pset; + pset_reference(pset); + new_task->priority = parent_task->priority; + task_unlock(parent_task); + } + else { + pset = &default_pset; + pset_reference(pset); + new_task->priority = BASEPRI_USER; + } + pset_lock(pset); + pset_add_task(pset, new_task); + pset_unlock(pset); + + new_task->may_assign = TRUE; + new_task->assign_active = FALSE; + +#if MACH_PCSAMPLE + new_task->pc_sample.buffer = 0; + new_task->pc_sample.seqno = 0; + new_task->pc_sample.sampletypes = 0; +#endif /* MACH_PCSAMPLE */ + +#if FAST_TAS + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + if (inherit_memory) { + new_task->fast_tas_base[i] = parent_task->fast_tas_base[i]; + new_task->fast_tas_end[i] = parent_task->fast_tas_end[i]; + } else { + new_task->fast_tas_base[i] = (vm_offset_t)0; + new_task->fast_tas_end[i] = (vm_offset_t)0; + } + } +#endif /* FAST_TAS */ + + ipc_task_enable(new_task); + +#if NORMA_TASK + new_task->child_node = -1; +#endif /* NORMA_TASK */ + + *child_task = new_task; + return KERN_SUCCESS; +} + +/* + * task_deallocate: + * + * Give up a reference to the specified task and destroy it if there + * are no other references left. It is assumed that the current thread + * is never in this task. + */ +void task_deallocate( + register task_t task) +{ + register int c; + register processor_set_t pset; + + if (task == TASK_NULL) + return; + + task_lock(task); + c = --(task->ref_count); + task_unlock(task); + if (c != 0) + return; + +#if NORMA_TASK + if (task->map == VM_MAP_NULL) { + /* norma placeholder task */ + zfree(task_zone, (vm_offset_t) task); + return; + } +#endif /* NORMA_TASK */ + + eml_task_deallocate(task); + + pset = task->processor_set; + pset_lock(pset); + pset_remove_task(pset,task); + pset_unlock(pset); + pset_deallocate(pset); + vm_map_deallocate(task->map); + is_release(task->itk_space); + zfree(task_zone, (vm_offset_t) task); +} + +void task_reference( + register task_t task) +{ + if (task == TASK_NULL) + return; + + task_lock(task); + task->ref_count++; + task_unlock(task); +} + +/* + * task_terminate: + * + * Terminate the specified task. See comments on thread_terminate + * (kern/thread.c) about problems with terminating the "current task." + */ +kern_return_t task_terminate( + register task_t task) +{ + register thread_t thread, cur_thread; + register queue_head_t *list; + register task_t cur_task; + spl_t s; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + list = &task->thread_list; + cur_task = current_task(); + cur_thread = current_thread(); + +#if NET_ATM + /* + * Shut down networking. + */ + mk_endpoint_collect(task); +#endif + + /* + * Deactivate task so that it can't be terminated again, + * and so lengthy operations in progress will abort. + * + * If the current thread is in this task, remove it from + * the task's thread list to keep the thread-termination + * loop simple. + */ + if (task == cur_task) { + task_lock(task); + if (!task->active) { + /* + * Task is already being terminated. + */ + task_unlock(task); + return KERN_FAILURE; + } + /* + * Make sure current thread is not being terminated. + */ + s = splsched(); + thread_lock(cur_thread); + if (!cur_thread->active) { + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(task); + thread_terminate(cur_thread); + return KERN_FAILURE; + } + task->active = FALSE; + queue_remove(list, cur_thread, thread_t, thread_list); + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(task); + + /* + * Shut down this thread's ipc now because it must + * be left alone to terminate the task. + */ + ipc_thread_disable(cur_thread); + ipc_thread_terminate(cur_thread); + } + else { + /* + * Lock both current and victim task to check for + * potential deadlock. + */ + if ((vm_offset_t)task < (vm_offset_t)cur_task) { + task_lock(task); + task_lock(cur_task); + } + else { + task_lock(cur_task); + task_lock(task); + } + /* + * Check if current thread or task is being terminated. + */ + s = splsched(); + thread_lock(cur_thread); + if ((!cur_task->active) ||(!cur_thread->active)) { + /* + * Current task or thread is being terminated. + */ + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(task); + task_unlock(cur_task); + thread_terminate(cur_thread); + return KERN_FAILURE; + } + thread_unlock(cur_thread); + (void) splx(s); + task_unlock(cur_task); + + if (!task->active) { + /* + * Task is already being terminated. + */ + task_unlock(task); + return KERN_FAILURE; + } + task->active = FALSE; + task_unlock(task); + } + + /* + * Prevent further execution of the task. ipc_task_disable + * prevents further task operations via the task port. + * If this is the current task, the current thread will + * be left running. + */ + ipc_task_disable(task); + (void) task_hold(task); + (void) task_dowait(task,TRUE); /* may block */ + + /* + * Terminate each thread in the task. + * + * The task_port is closed down, so no more thread_create + * operations can be done. Thread_force_terminate closes the + * thread port for each thread; when that is done, the + * thread will eventually disappear. Thus the loop will + * terminate. Call thread_force_terminate instead of + * thread_terminate to avoid deadlock checks. Need + * to call thread_block() inside loop because some other + * thread (e.g., the reaper) may have to run to get rid + * of all references to the thread; it won't vanish from + * the task's thread list until the last one is gone. + */ + task_lock(task); + while (!queue_empty(list)) { + thread = (thread_t) queue_first(list); + thread_reference(thread); + task_unlock(task); + thread_force_terminate(thread); + thread_deallocate(thread); + thread_block((void (*)()) 0); + task_lock(task); + } + task_unlock(task); + + /* + * Shut down IPC. + */ + ipc_task_terminate(task); + + + /* + * Deallocate the task's reference to itself. + */ + task_deallocate(task); + + /* + * If the current thread is in this task, it has not yet + * been terminated (since it was removed from the task's + * thread-list). Put it back in the thread list (for + * completeness), and terminate it. Since it holds the + * last reference to the task, terminating it will deallocate + * the task. + */ + if (cur_thread->task == task) { + task_lock(task); + s = splsched(); + queue_enter(list, cur_thread, thread_t, thread_list); + (void) splx(s); + task_unlock(task); + (void) thread_terminate(cur_thread); + } + + return KERN_SUCCESS; +} + +/* + * task_hold: + * + * Suspend execution of the specified task. + * This is a recursive-style suspension of the task, a count of + * suspends is maintained. + */ +kern_return_t task_hold( + register task_t task) +{ + register queue_head_t *list; + register thread_t thread, cur_thread; + + cur_thread = current_thread(); + + task_lock(task); + if (!task->active) { + task_unlock(task); + return KERN_FAILURE; + } + + task->suspend_count++; + + /* + * Iterate through all the threads and hold them. + * Do not hold the current thread if it is within the + * task. + */ + list = &task->thread_list; + queue_iterate(list, thread, thread_t, thread_list) { + if (thread != cur_thread) + thread_hold(thread); + } + task_unlock(task); + return KERN_SUCCESS; +} + +/* + * task_dowait: + * + * Wait until the task has really been suspended (all of the threads + * are stopped). Skip the current thread if it is within the task. + * + * If task is deactivated while waiting, return a failure code unless + * must_wait is true. + */ +kern_return_t task_dowait( + register task_t task, + boolean_t must_wait) +{ + register queue_head_t *list; + register thread_t thread, cur_thread, prev_thread; + register kern_return_t ret = KERN_SUCCESS; + + /* + * Iterate through all the threads. + * While waiting for each thread, we gain a reference to it + * to prevent it from going away on us. This guarantees + * that the "next" thread in the list will be a valid thread. + * + * We depend on the fact that if threads are created while + * we are looping through the threads, they will be held + * automatically. We don't care about threads that get + * deallocated along the way (the reference prevents it + * from happening to the thread we are working with). + * + * If the current thread is in the affected task, it is skipped. + * + * If the task is deactivated before we're done, and we don't + * have to wait for it (must_wait is FALSE), just bail out. + */ + cur_thread = current_thread(); + + list = &task->thread_list; + prev_thread = THREAD_NULL; + task_lock(task); + queue_iterate(list, thread, thread_t, thread_list) { + if (!(task->active) && !(must_wait)) { + ret = KERN_FAILURE; + break; + } + if (thread != cur_thread) { + thread_reference(thread); + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + /* may block */ + (void) thread_dowait(thread, TRUE); /* may block */ + prev_thread = thread; + task_lock(task); + } + } + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); /* may block */ + return ret; +} + +kern_return_t task_release( + register task_t task) +{ + register queue_head_t *list; + register thread_t thread, next; + + task_lock(task); + if (!task->active) { + task_unlock(task); + return KERN_FAILURE; + } + + task->suspend_count--; + + /* + * Iterate through all the threads and release them + */ + list = &task->thread_list; + thread = (thread_t) queue_first(list); + while (!queue_end(list, (queue_entry_t) thread)) { + next = (thread_t) queue_next(&thread->thread_list); + thread_release(thread); + thread = next; + } + task_unlock(task); + return KERN_SUCCESS; +} + +kern_return_t task_threads( + task_t task, + thread_array_t *thread_list, + natural_t *count) +{ + unsigned int actual; /* this many threads */ + thread_t thread; + thread_t *threads; + int i; + + vm_size_t size, size_needed; + vm_offset_t addr; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + task_lock(task); + if (!task->active) { + task_unlock(task); + return KERN_FAILURE; + } + + actual = task->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(mach_port_t); + if (size_needed <= size) + break; + + /* unlock the task and allocate more memory */ + task_unlock(task); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the task is locked & active */ + + threads = (thread_t *) addr; + + for (i = 0, thread = (thread_t) queue_first(&task->thread_list); + i < actual; + i++, thread = (thread_t) queue_next(&thread->thread_list)) { + /* take ref for convert_thread_to_port */ + thread_reference(thread); + threads[i] = thread; + } + assert(queue_end(&task->thread_list, (queue_entry_t) thread)); + + /* can unlock task now that we've got the thread refs */ + task_unlock(task); + + if (actual == 0) { + /* no threads, so return null pointer and deallocate memory */ + + *thread_list = 0; + *count = 0; + + if (size != 0) + kfree(addr, size); + } else { + /* if we allocated too much, must copy */ + + if (size_needed < size) { + vm_offset_t newaddr; + + newaddr = kalloc(size_needed); + if (newaddr == 0) { + for (i = 0; i < actual; i++) + thread_deallocate(threads[i]); + kfree(addr, size); + return KERN_RESOURCE_SHORTAGE; + } + + bcopy((char *) addr, (char *) newaddr, size_needed); + kfree(addr, size); + threads = (thread_t *) newaddr; + } + + *thread_list = (mach_port_t *) threads; + *count = actual; + + /* do the conversion that Mig should handle */ + + for (i = 0; i < actual; i++) + ((ipc_port_t *) threads)[i] = + convert_thread_to_port(threads[i]); + } + + return KERN_SUCCESS; +} + +kern_return_t task_suspend( + register task_t task) +{ + register boolean_t hold; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + hold = FALSE; + task_lock(task); + if ((task->user_stop_count)++ == 0) + hold = TRUE; + task_unlock(task); + + /* + * If the stop count was positive, the task is + * already stopped and we can exit. + */ + if (!hold) { + return KERN_SUCCESS; + } + + /* + * Hold all of the threads in the task, and wait for + * them to stop. If the current thread is within + * this task, hold it separately so that all of the + * other threads can stop first. + */ + + if (task_hold(task) != KERN_SUCCESS) + return KERN_FAILURE; + + if (task_dowait(task, FALSE) != KERN_SUCCESS) + return KERN_FAILURE; + + if (current_task() == task) { + spl_t s; + + thread_hold(current_thread()); + /* + * We want to call thread_block on our way out, + * to stop running. + */ + s = splsched(); + ast_on(cpu_number(), AST_BLOCK); + (void) splx(s); + } + + return KERN_SUCCESS; +} + +kern_return_t task_resume( + register task_t task) +{ + register boolean_t release; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + release = FALSE; + task_lock(task); + if (task->user_stop_count > 0) { + if (--(task->user_stop_count) == 0) + release = TRUE; + } + else { + task_unlock(task); + return KERN_FAILURE; + } + task_unlock(task); + + /* + * Release the task if necessary. + */ + if (release) + return task_release(task); + + return KERN_SUCCESS; +} + +kern_return_t task_info( + task_t task, + int flavor, + task_info_t task_info_out, /* pointer to OUT array */ + natural_t *task_info_count) /* IN/OUT */ +{ + vm_map_t map; + + if (task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + switch (flavor) { + case TASK_BASIC_INFO: + { + register task_basic_info_t basic_info; + + if (*task_info_count < TASK_BASIC_INFO_COUNT) { + return KERN_INVALID_ARGUMENT; + } + + basic_info = (task_basic_info_t) task_info_out; + + map = (task == kernel_task) ? kernel_map : task->map; + + basic_info->virtual_size = map->size; + basic_info->resident_size = pmap_resident_count(map->pmap) + * PAGE_SIZE; + + task_lock(task); + basic_info->base_priority = task->priority; + basic_info->suspend_count = task->user_stop_count; + basic_info->user_time.seconds + = task->total_user_time.seconds; + basic_info->user_time.microseconds + = task->total_user_time.microseconds; + basic_info->system_time.seconds + = task->total_system_time.seconds; + basic_info->system_time.microseconds + = task->total_system_time.microseconds; + task_unlock(task); + + *task_info_count = TASK_BASIC_INFO_COUNT; + break; + } + + case TASK_THREAD_TIMES_INFO: + { + register task_thread_times_info_t times_info; + register thread_t thread; + + if (*task_info_count < TASK_THREAD_TIMES_INFO_COUNT) { + return KERN_INVALID_ARGUMENT; + } + + times_info = (task_thread_times_info_t) task_info_out; + times_info->user_time.seconds = 0; + times_info->user_time.microseconds = 0; + times_info->system_time.seconds = 0; + times_info->system_time.microseconds = 0; + + task_lock(task); + queue_iterate(&task->thread_list, thread, + thread_t, thread_list) + { + time_value_t user_time, system_time; + spl_t s; + + s = splsched(); + thread_lock(thread); + + thread_read_times(thread, &user_time, &system_time); + + thread_unlock(thread); + splx(s); + + time_value_add(×_info->user_time, &user_time); + time_value_add(×_info->system_time, &system_time); + } + task_unlock(task); + + *task_info_count = TASK_THREAD_TIMES_INFO_COUNT; + break; + } + + default: + return KERN_INVALID_ARGUMENT; + } + + return KERN_SUCCESS; +} + +#if MACH_HOST +/* + * task_assign: + * + * Change the assigned processor set for the task + */ +kern_return_t +task_assign( + task_t task, + processor_set_t new_pset, + boolean_t assign_threads) +{ + kern_return_t ret = KERN_SUCCESS; + register thread_t thread, prev_thread; + register queue_head_t *list; + register processor_set_t pset; + + if (task == TASK_NULL || new_pset == PROCESSOR_SET_NULL) { + return KERN_INVALID_ARGUMENT; + } + + /* + * Freeze task`s assignment. Prelude to assigning + * task. Only one freeze may be held per task. + */ + + task_lock(task); + while (task->may_assign == FALSE) { + task->assign_active = TRUE; + assert_wait((event_t)&task->assign_active, TRUE); + task_unlock(task); + thread_block((void (*)()) 0); + task_lock(task); + } + + /* + * Avoid work if task already in this processor set. + */ + if (task->processor_set == new_pset) { + /* + * No need for task->assign_active wakeup: + * task->may_assign is still TRUE. + */ + task_unlock(task); + return KERN_SUCCESS; + } + + task->may_assign = FALSE; + task_unlock(task); + + /* + * Safe to get the task`s pset: it cannot change while + * task is frozen. + */ + pset = task->processor_set; + + /* + * Lock both psets now. Use ordering to avoid deadlock. + */ + Restart: + if ((vm_offset_t) pset < (vm_offset_t) new_pset) { + pset_lock(pset); + pset_lock(new_pset); + } + else { + pset_lock(new_pset); + pset_lock(pset); + } + + /* + * Check if new_pset is ok to assign to. If not, + * reassign to default_pset. + */ + if (!new_pset->active) { + pset_unlock(pset); + pset_unlock(new_pset); + new_pset = &default_pset; + goto Restart; + } + + pset_reference(new_pset); + + /* + * Now grab the task lock and move the task. + */ + + task_lock(task); + pset_remove_task(pset, task); + pset_add_task(new_pset, task); + + pset_unlock(pset); + pset_unlock(new_pset); + + if (assign_threads == FALSE) { + /* + * We leave existing threads at their + * old assignments. Unfreeze task`s + * assignment. + */ + task->may_assign = TRUE; + if (task->assign_active) { + task->assign_active = FALSE; + thread_wakeup((event_t) &task->assign_active); + } + task_unlock(task); + pset_deallocate(pset); + return KERN_SUCCESS; + } + + /* + * If current thread is in task, freeze its assignment. + */ + if (current_thread()->task == task) { + task_unlock(task); + thread_freeze(current_thread()); + task_lock(task); + } + + /* + * Iterate down the thread list reassigning all the threads. + * New threads pick up task's new processor set automatically. + * Do current thread last because new pset may be empty. + */ + list = &task->thread_list; + prev_thread = THREAD_NULL; + queue_iterate(list, thread, thread_t, thread_list) { + if (!(task->active)) { + ret = KERN_FAILURE; + break; + } + if (thread != current_thread()) { + thread_reference(thread); + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); /* may block */ + thread_assign(thread,new_pset); /* may block */ + prev_thread = thread; + task_lock(task); + } + } + + /* + * Done, wakeup anyone waiting for us. + */ + task->may_assign = TRUE; + if (task->assign_active) { + task->assign_active = FALSE; + thread_wakeup((event_t)&task->assign_active); + } + task_unlock(task); + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); /* may block */ + + /* + * Finish assignment of current thread. + */ + if (current_thread()->task == task) + thread_doassign(current_thread(), new_pset, TRUE); + + pset_deallocate(pset); + + return ret; +} +#else /* MACH_HOST */ +/* + * task_assign: + * + * Change the assigned processor set for the task + */ +kern_return_t +task_assign( + task_t task, + processor_set_t new_pset, + boolean_t assign_threads) +{ + return KERN_FAILURE; +} +#endif /* MACH_HOST */ + + +/* + * task_assign_default: + * + * Version of task_assign to assign to default processor set. + */ +kern_return_t +task_assign_default( + task_t task, + boolean_t assign_threads) +{ + return task_assign(task, &default_pset, assign_threads); +} + +/* + * task_get_assignment + * + * Return name of processor set that task is assigned to. + */ +kern_return_t task_get_assignment( + task_t task, + processor_set_t *pset) +{ + if (!task->active) + return KERN_FAILURE; + + *pset = task->processor_set; + pset_reference(*pset); + return KERN_SUCCESS; +} + +/* + * task_priority + * + * Set priority of task; used only for newly created threads. + * Optionally change priorities of threads. + */ +kern_return_t +task_priority( + task_t task, + int priority, + boolean_t change_threads) +{ + kern_return_t ret = KERN_SUCCESS; + + if (task == TASK_NULL || invalid_pri(priority)) + return KERN_INVALID_ARGUMENT; + + task_lock(task); + task->priority = priority; + + if (change_threads) { + register thread_t thread; + register queue_head_t *list; + + list = &task->thread_list; + queue_iterate(list, thread, thread_t, thread_list) { + if (thread_priority(thread, priority, FALSE) + != KERN_SUCCESS) + ret = KERN_FAILURE; + } + } + + task_unlock(task); + return ret; +} + +/* + * task_collect_scan: + * + * Attempt to free resources owned by tasks. + */ + +void task_collect_scan(void) +{ + register task_t task, prev_task; + processor_set_t pset, prev_pset; + + prev_task = TASK_NULL; + prev_pset = PROCESSOR_SET_NULL; + + simple_lock(&all_psets_lock); + queue_iterate(&all_psets, pset, processor_set_t, all_psets) { + pset_lock(pset); + queue_iterate(&pset->tasks, task, task_t, pset_tasks) { + task_reference(task); + pset_reference(pset); + pset_unlock(pset); + simple_unlock(&all_psets_lock); + + pmap_collect(task->map->pmap); + + if (prev_task != TASK_NULL) + task_deallocate(prev_task); + prev_task = task; + + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); + prev_pset = pset; + + simple_lock(&all_psets_lock); + pset_lock(pset); + } + pset_unlock(pset); + } + simple_unlock(&all_psets_lock); + + if (prev_task != TASK_NULL) + task_deallocate(prev_task); + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); +} + +boolean_t task_collect_allowed = TRUE; +unsigned task_collect_last_tick = 0; +unsigned task_collect_max_rate = 0; /* in ticks */ + +/* + * consider_task_collect: + * + * Called by the pageout daemon when the system needs more free pages. + */ + +void consider_task_collect(void) +{ + /* + * By default, don't attempt task collection more frequently + * than once a second. + */ + + if (task_collect_max_rate == 0) + task_collect_max_rate = hz; + + if (task_collect_allowed && + (sched_tick > (task_collect_last_tick + task_collect_max_rate))) { + task_collect_last_tick = sched_tick; + task_collect_scan(); + } +} + +kern_return_t +task_ras_control( + task_t task, + vm_offset_t pc, + vm_offset_t endpc, + int flavor) +{ + kern_return_t ret = KERN_FAILURE; + +#if FAST_TAS + int i; + + ret = KERN_SUCCESS; + task_lock(task); + switch (flavor) { + case TASK_RAS_CONTROL_PURGE_ALL: /* remove all RAS */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + task->fast_tas_base[i] = task->fast_tas_end[i] = 0; + } + break; + case TASK_RAS_CONTROL_PURGE_ONE: /* remove this RAS, collapse remaining */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + if ( (task->fast_tas_base[i] == pc) + && (task->fast_tas_end[i] == endpc)) { + while (i < TASK_FAST_TAS_NRAS-1) { + task->fast_tas_base[i] = task->fast_tas_base[i+1]; + task->fast_tas_end[i] = task->fast_tas_end[i+1]; + i++; + } + task->fast_tas_base[TASK_FAST_TAS_NRAS-1] = 0; + task->fast_tas_end[TASK_FAST_TAS_NRAS-1] = 0; + break; + } + } + if (i == TASK_FAST_TAS_NRAS) { + ret = KERN_INVALID_ADDRESS; + } + break; + case TASK_RAS_CONTROL_PURGE_ALL_AND_INSTALL_ONE: + /* remove all RAS an install this RAS */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + task->fast_tas_base[i] = task->fast_tas_end[i] = 0; + } + /* FALL THROUGH */ + case TASK_RAS_CONTROL_INSTALL_ONE: /* install this RAS */ + for (i = 0; i < TASK_FAST_TAS_NRAS; i++) { + if ( (task->fast_tas_base[i] == pc) + && (task->fast_tas_end[i] == endpc)) { + /* already installed */ + break; + } + if ((task->fast_tas_base[i] == 0) && (task->fast_tas_end[i] == 0)){ + task->fast_tas_base[i] = pc; + task->fast_tas_end[i] = endpc; + break; + } + } + if (i == TASK_FAST_TAS_NRAS) { + ret = KERN_RESOURCE_SHORTAGE; + } + break; + default: ret = KERN_INVALID_VALUE; + break; + } + task_unlock(task); +#endif + return ret; +} diff --git a/kern/task.h b/kern/task.h new file mode 100644 index 0000000..52733b9 --- /dev/null +++ b/kern/task.h @@ -0,0 +1,183 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: task.h + * Author: Avadis Tevanian, Jr. + * + * This file contains the structure definitions for tasks. + * + */ + +#ifndef _KERN_TASK_H_ +#define _KERN_TASK_H_ + +#include <norma_task.h> +#include <fast_tas.h> +#include <net_atm.h> + +#include <mach/boolean.h> +#include <mach/port.h> +#include <mach/time_value.h> +#include <mach/mach_param.h> +#include <mach/task_info.h> +#include <kern/kern_types.h> +#include <kern/lock.h> +#include <kern/queue.h> +#include <kern/pc_sample.h> +#include <kern/processor.h> +#include <kern/syscall_emulation.h> +#include <vm/vm_map.h> + +#if NET_ATM +typedef struct nw_ep_owned { + unsigned int ep; + struct nw_ep_owned *next; +} nw_ep_owned_s, *nw_ep_owned_t; +#endif + +struct task { + /* Synchronization/destruction information */ + decl_simple_lock_data(,lock) /* Task's lock */ + int ref_count; /* Number of references to me */ + boolean_t active; /* Task has not been terminated */ + + /* Miscellaneous */ + vm_map_t map; /* Address space description */ + queue_chain_t pset_tasks; /* list of tasks assigned to pset */ + int suspend_count; /* Internal scheduling only */ + + /* Thread information */ + queue_head_t thread_list; /* list of threads */ + int thread_count; /* number of threads */ + processor_set_t processor_set; /* processor set for new threads */ + boolean_t may_assign; /* can assigned pset be changed? */ + boolean_t assign_active; /* waiting for may_assign */ + + /* User-visible scheduling information */ + int user_stop_count; /* outstanding stops */ + int priority; /* for new threads */ + + /* Statistics */ + time_value_t total_user_time; + /* total user time for dead threads */ + time_value_t total_system_time; + /* total system time for dead threads */ + + /* IPC structures */ + decl_simple_lock_data(, itk_lock_data) + struct ipc_port *itk_self; /* not a right, doesn't hold ref */ + struct ipc_port *itk_sself; /* a send right */ + struct ipc_port *itk_exception; /* a send right */ + struct ipc_port *itk_bootstrap; /* a send right */ + struct ipc_port *itk_registered[TASK_PORT_REGISTER_MAX]; + /* all send rights */ + + struct ipc_space *itk_space; + + /* User space system call emulation support */ + struct eml_dispatch *eml_dispatch; + + sample_control_t pc_sample; + +#if NORMA_TASK + long child_node; /* if != -1, node for new children */ +#endif /* NORMA_TASK */ + +#if FAST_TAS +#define TASK_FAST_TAS_NRAS 8 + vm_offset_t fast_tas_base[TASK_FAST_TAS_NRAS]; + vm_offset_t fast_tas_end[TASK_FAST_TAS_NRAS]; +#endif /* FAST_TAS */ + +#if NET_ATM + nw_ep_owned_t nw_ep_owned; +#endif /* NET_ATM */ +}; + +#define task_lock(task) simple_lock(&(task)->lock) +#define task_unlock(task) simple_unlock(&(task)->lock) + +#define itk_lock_init(task) simple_lock_init(&(task)->itk_lock_data) +#define itk_lock(task) simple_lock(&(task)->itk_lock_data) +#define itk_unlock(task) simple_unlock(&(task)->itk_lock_data) + +/* + * Exported routines/macros + */ + +extern kern_return_t task_create( + task_t parent_task, + boolean_t inherit_memory, + task_t *child_task); +extern kern_return_t task_terminate( + task_t task); +extern kern_return_t task_suspend( + task_t task); +extern kern_return_t task_resume( + task_t task); +extern kern_return_t task_threads( + task_t task, + thread_array_t *thread_list, + natural_t *count); +extern kern_return_t task_info( + task_t task, + int flavor, + task_info_t task_info_out, + natural_t *task_info_count); +extern kern_return_t task_get_special_port( + task_t task, + int which, + struct ipc_port **portp); +extern kern_return_t task_set_special_port( + task_t task, + int which, + struct ipc_port *port); +extern kern_return_t task_assign( + task_t task, + processor_set_t new_pset, + boolean_t assign_threads); +extern kern_return_t task_assign_default( + task_t task, + boolean_t assign_threads); + +/* + * Internal only routines + */ + +extern void task_init(); +extern void task_reference(); +extern void task_deallocate(); +extern kern_return_t task_hold(); +extern kern_return_t task_dowait(); +extern kern_return_t task_release(); +extern kern_return_t task_halt(); + +extern kern_return_t task_suspend_nowait(); +extern task_t kernel_task_create(); + +extern task_t kernel_task; + +#endif _KERN_TASK_H_ diff --git a/kern/thread.c b/kern/thread.c new file mode 100644 index 0000000..02969f3 --- /dev/null +++ b/kern/thread.c @@ -0,0 +1,2575 @@ +/* + * Mach Operating System + * Copyright (c) 1994-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/thread.c + * Author: Avadis Tevanian, Jr., Michael Wayne Young, David Golub + * Date: 1986 + * + * Thread management primitives implementation. + */ + +#include <cpus.h> +#include <hw_footprint.h> +#include <mach_host.h> +#include <mach_fixpri.h> +#include <mach_pcsample.h> +#include <simple_clock.h> +#include <mach_debug.h> +#include <net_atm.h> + +#include <mach/std_types.h> +#include <mach/policy.h> +#include <mach/thread_info.h> +#include <mach/thread_special_ports.h> +#include <mach/thread_status.h> +#include <mach/time_value.h> +#include "vm_param.h" +#include <kern/ast.h> +#include <kern/counters.h> +#include <kern/ipc_tt.h> +#include <kern/mach_param.h> +#include <kern/processor.h> +#include <kern/queue.h> +#include <kern/sched.h> +#include <kern/sched_prim.h> +#include <kern/thread.h> +#include <kern/thread_swap.h> +#include <kern/host.h> +#include <kern/zalloc.h> +#include <vm/vm_kern.h> +#include <ipc/ipc_kmsg.h> +#include <ipc/ipc_port.h> +#include <ipc/mach_msg.h> +#include <machine/machspl.h> /* for splsched */ +#include <machine/thread.h> /* for MACHINE_STACK */ + +#if NET_ATM +#include <chips/nw_mk.h> +#endif + +thread_t active_threads[NCPUS]; +vm_offset_t active_stacks[NCPUS]; + +struct zone *thread_zone; + +queue_head_t reaper_queue; +decl_simple_lock_data(, reaper_lock) + +extern int tick; + +extern void pcb_module_init(void); + +/* private */ +struct thread thread_template; + +#if MACH_DEBUG +void stack_init(vm_offset_t stack); /* forward */ +void stack_finalize(vm_offset_t stack); /* forward */ + +#define STACK_MARKER 0xdeadbeefU +boolean_t stack_check_usage = FALSE; +decl_simple_lock_data(, stack_usage_lock) +vm_size_t stack_max_usage = 0; +#endif /* MACH_DEBUG */ + +/* + * Machine-dependent code must define: + * pcb_init + * pcb_terminate + * pcb_collect + * + * The thread->pcb field is reserved for machine-dependent code. + */ + +#ifdef MACHINE_STACK +/* + * Machine-dependent code must define: + * stack_alloc_try + * stack_alloc + * stack_free + * stack_handoff + * stack_collect + * and if MACH_DEBUG: + * stack_statistics + */ +#else /* MACHINE_STACK */ +/* + * We allocate stacks from generic kernel VM. + * Machine-dependent code must define: + * stack_attach + * stack_detach + * stack_handoff + * + * The stack_free_list can only be accessed at splsched, + * because stack_alloc_try/thread_invoke operate at splsched. + */ + +decl_simple_lock_data(, stack_lock_data)/* splsched only */ +#define stack_lock() simple_lock(&stack_lock_data) +#define stack_unlock() simple_unlock(&stack_lock_data) + +vm_offset_t stack_free_list; /* splsched only */ +unsigned int stack_free_count = 0; /* splsched only */ +unsigned int stack_free_limit = 1; /* patchable */ + +unsigned int stack_alloc_hits = 0; /* debugging */ +unsigned int stack_alloc_misses = 0; /* debugging */ +unsigned int stack_alloc_max = 0; /* debugging */ + +/* + * The next field is at the base of the stack, + * so the low end is left unsullied. + */ + +#define stack_next(stack) (*((vm_offset_t *)((stack) + KERNEL_STACK_SIZE) - 1)) + +/* + * stack_alloc_try: + * + * Non-blocking attempt to allocate a kernel stack. + * Called at splsched with the thread locked. + */ + +boolean_t stack_alloc_try( + thread_t thread, + void (*resume)(thread_t)) +{ + register vm_offset_t stack; + + stack_lock(); + stack = stack_free_list; + if (stack != 0) { + stack_free_list = stack_next(stack); + stack_free_count--; + } else { + stack = thread->stack_privilege; + } + stack_unlock(); + + if (stack != 0) { + stack_attach(thread, stack, resume); + stack_alloc_hits++; + return TRUE; + } else { + stack_alloc_misses++; + return FALSE; + } +} + +/* + * stack_alloc: + * + * Allocate a kernel stack for a thread. + * May block. + */ + +void stack_alloc( + thread_t thread, + void (*resume)(thread_t)) +{ + vm_offset_t stack; + spl_t s; + + /* + * We first try the free list. It is probably empty, + * or stack_alloc_try would have succeeded, but possibly + * a stack was freed before the swapin thread got to us. + */ + + s = splsched(); + stack_lock(); + stack = stack_free_list; + if (stack != 0) { + stack_free_list = stack_next(stack); + stack_free_count--; + } + stack_unlock(); + (void) splx(s); + + if (stack == 0) { + /* + * Kernel stacks should be naturally aligned, + * so that it is easy to find the starting/ending + * addresses of a stack given an address in the middle. + */ + + if (kmem_alloc_aligned(kernel_map, &stack, KERNEL_STACK_SIZE) + != KERN_SUCCESS) + panic("stack_alloc"); + +#if MACH_DEBUG + stack_init(stack); +#endif /* MACH_DEBUG */ + } + + stack_attach(thread, stack, resume); +} + +/* + * stack_free: + * + * Free a thread's kernel stack. + * Called at splsched with the thread locked. + */ + +void stack_free( + thread_t thread) +{ + register vm_offset_t stack; + + stack = stack_detach(thread); + + if (stack != thread->stack_privilege) { + stack_lock(); + stack_next(stack) = stack_free_list; + stack_free_list = stack; + if (++stack_free_count > stack_alloc_max) + stack_alloc_max = stack_free_count; + stack_unlock(); + } +} + +/* + * stack_collect: + * + * Free excess kernel stacks. + * May block. + */ + +void stack_collect(void) +{ + register vm_offset_t stack; + spl_t s; + + s = splsched(); + stack_lock(); + while (stack_free_count > stack_free_limit) { + stack = stack_free_list; + stack_free_list = stack_next(stack); + stack_free_count--; + stack_unlock(); + (void) splx(s); + +#if MACH_DEBUG + stack_finalize(stack); +#endif /* MACH_DEBUG */ + kmem_free(kernel_map, stack, KERNEL_STACK_SIZE); + + s = splsched(); + stack_lock(); + } + stack_unlock(); + (void) splx(s); +} +#endif /* MACHINE_STACK */ + +/* + * stack_privilege: + * + * stack_alloc_try on this thread must always succeed. + */ + +void stack_privilege( + register thread_t thread) +{ + /* + * This implementation only works for the current thread. + */ + + if (thread != current_thread()) + panic("stack_privilege"); + + if (thread->stack_privilege == 0) + thread->stack_privilege = current_stack(); +} + +void thread_init(void) +{ + thread_zone = zinit( + sizeof(struct thread), + THREAD_MAX * sizeof(struct thread), + THREAD_CHUNK * sizeof(struct thread), + 0, "threads"); + + /* + * Fill in a template thread for fast initialization. + * [Fields that must be (or are typically) reset at + * time of creation are so noted.] + */ + + /* thread_template.links (none) */ + thread_template.runq = RUN_QUEUE_NULL; + + /* thread_template.task (later) */ + /* thread_template.thread_list (later) */ + /* thread_template.pset_threads (later) */ + + /* thread_template.lock (later) */ + /* one ref for being alive; one for the guy who creates the thread */ + thread_template.ref_count = 2; + + thread_template.pcb = (pcb_t) 0; /* (reset) */ + thread_template.kernel_stack = (vm_offset_t) 0; + thread_template.stack_privilege = (vm_offset_t) 0; + + thread_template.wait_event = 0; + /* thread_template.suspend_count (later) */ + thread_template.wait_result = KERN_SUCCESS; + thread_template.wake_active = FALSE; + thread_template.state = TH_SUSP | TH_SWAPPED; + thread_template.swap_func = thread_bootstrap_return; + +/* thread_template.priority (later) */ + thread_template.max_priority = BASEPRI_USER; +/* thread_template.sched_pri (later - compute_priority) */ +#if MACH_FIXPRI + thread_template.sched_data = 0; + thread_template.policy = POLICY_TIMESHARE; +#endif /* MACH_FIXPRI */ + thread_template.depress_priority = -1; + thread_template.cpu_usage = 0; + thread_template.sched_usage = 0; + /* thread_template.sched_stamp (later) */ + + thread_template.recover = (vm_offset_t) 0; + thread_template.vm_privilege = FALSE; + + thread_template.user_stop_count = 1; + + /* thread_template.<IPC structures> (later) */ + + timer_init(&(thread_template.user_timer)); + timer_init(&(thread_template.system_timer)); + thread_template.user_timer_save.low = 0; + thread_template.user_timer_save.high = 0; + thread_template.system_timer_save.low = 0; + thread_template.system_timer_save.high = 0; + thread_template.cpu_delta = 0; + thread_template.sched_delta = 0; + + thread_template.active = FALSE; /* reset */ + thread_template.ast = AST_ZILCH; + + /* thread_template.processor_set (later) */ + thread_template.bound_processor = PROCESSOR_NULL; +#if MACH_HOST + thread_template.may_assign = TRUE; + thread_template.assign_active = FALSE; +#endif /* MACH_HOST */ + +#if NCPUS > 1 + /* thread_template.last_processor (later) */ +#endif /* NCPUS > 1 */ + + /* + * Initialize other data structures used in + * this module. + */ + + queue_init(&reaper_queue); + simple_lock_init(&reaper_lock); + +#ifndef MACHINE_STACK + simple_lock_init(&stack_lock_data); +#endif /* MACHINE_STACK */ + +#if MACH_DEBUG + simple_lock_init(&stack_usage_lock); +#endif /* MACH_DEBUG */ + + /* + * Initialize any machine-dependent + * per-thread structures necessary. + */ + + pcb_module_init(); +} + +kern_return_t thread_create( + register task_t parent_task, + thread_t *child_thread) /* OUT */ +{ + register thread_t new_thread; + register processor_set_t pset; + + if (parent_task == TASK_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Allocate a thread and initialize static fields + */ + + new_thread = (thread_t) zalloc(thread_zone); + + if (new_thread == THREAD_NULL) + return KERN_RESOURCE_SHORTAGE; + + *new_thread = thread_template; + + /* + * Initialize runtime-dependent fields + */ + + new_thread->task = parent_task; + simple_lock_init(&new_thread->lock); + new_thread->sched_stamp = sched_tick; + thread_timeout_setup(new_thread); + + /* + * Create a pcb. The kernel stack is created later, + * when the thread is swapped-in. + */ + pcb_init(new_thread); + + ipc_thread_init(new_thread); + +#if NET_ATM + new_thread->nw_ep_waited = 0; +#endif + + /* + * Find the processor set for the parent task. + */ + task_lock(parent_task); + pset = parent_task->processor_set; + pset_reference(pset); + task_unlock(parent_task); + + /* + * Lock both the processor set and the task, + * so that the thread can be added to both + * simultaneously. Processor set must be + * locked first. + */ + + Restart: + pset_lock(pset); + task_lock(parent_task); + + /* + * If the task has changed processor sets, + * catch up (involves lots of lock juggling). + */ + { + processor_set_t cur_pset; + + cur_pset = parent_task->processor_set; + if (!cur_pset->active) + cur_pset = &default_pset; + + if (cur_pset != pset) { + pset_reference(cur_pset); + task_unlock(parent_task); + pset_unlock(pset); + pset_deallocate(pset); + pset = cur_pset; + goto Restart; + } + } + + /* + * Set the thread`s priority from the pset and task. + */ + + new_thread->priority = parent_task->priority; + if (pset->max_priority > new_thread->max_priority) + new_thread->max_priority = pset->max_priority; + if (new_thread->max_priority > new_thread->priority) + new_thread->priority = new_thread->max_priority; + /* + * Don't need to lock thread here because it can't + * possibly execute and no one else knows about it. + */ + compute_priority(new_thread, TRUE); + + /* + * Thread is suspended if the task is. Add 1 to + * suspend count since thread is created in suspended + * state. + */ + new_thread->suspend_count = parent_task->suspend_count + 1; + + /* + * Add the thread to the processor set. + * If the pset is empty, suspend the thread again. + */ + + pset_add_thread(pset, new_thread); + if (pset->empty) + new_thread->suspend_count++; + +#if HW_FOOTPRINT + /* + * Need to set last_processor, idle processor would be best, but + * that requires extra locking nonsense. Go for tail of + * processors queue to avoid master. + */ + if (!pset->empty) { + new_thread->last_processor = + (processor_t)queue_first(&pset->processors); + } + else { + /* + * Thread created in empty processor set. Pick + * master processor as an acceptable legal value. + */ + new_thread->last_processor = master_processor; + } +#else /* HW_FOOTPRINT */ + /* + * Don't need to initialize because the context switch + * code will set it before it can be used. + */ +#endif /* HW_FOOTPRINT */ + +#if MACH_PCSAMPLE + new_thread->pc_sample.buffer = 0; + new_thread->pc_sample.seqno = 0; + new_thread->pc_sample.sampletypes = 0; +#endif /* MACH_PCSAMPLE */ + + new_thread->pc_sample.buffer = 0; + /* + * Add the thread to the task`s list of threads. + * The new thread holds another reference to the task. + */ + + parent_task->ref_count++; + + parent_task->thread_count++; + queue_enter(&parent_task->thread_list, new_thread, thread_t, + thread_list); + + /* + * Finally, mark the thread active. + */ + + new_thread->active = TRUE; + + if (!parent_task->active) { + task_unlock(parent_task); + pset_unlock(pset); + (void) thread_terminate(new_thread); + /* release ref we would have given our caller */ + thread_deallocate(new_thread); + return KERN_FAILURE; + } + task_unlock(parent_task); + pset_unlock(pset); + + ipc_thread_enable(new_thread); + + *child_thread = new_thread; + return KERN_SUCCESS; +} + +unsigned int thread_deallocate_stack = 0; + +void thread_deallocate( + register thread_t thread) +{ + spl_t s; + register task_t task; + register processor_set_t pset; + + time_value_t user_time, system_time; + + if (thread == THREAD_NULL) + return; + + /* + * First, check for new count > 0 (the common case). + * Only the thread needs to be locked. + */ + s = splsched(); + thread_lock(thread); + if (--thread->ref_count > 0) { + thread_unlock(thread); + (void) splx(s); + return; + } + + /* + * Count is zero. However, the task's and processor set's + * thread lists have implicit references to + * the thread, and may make new ones. Their locks also + * dominate the thread lock. To check for this, we + * temporarily restore the one thread reference, unlock + * the thread, and then lock the other structures in + * the proper order. + */ + thread->ref_count = 1; + thread_unlock(thread); + (void) splx(s); + + pset = thread->processor_set; + pset_lock(pset); + +#if MACH_HOST + /* + * The thread might have moved. + */ + while (pset != thread->processor_set) { + pset_unlock(pset); + pset = thread->processor_set; + pset_lock(pset); + } +#endif /* MACH_HOST */ + + task = thread->task; + task_lock(task); + + s = splsched(); + thread_lock(thread); + + if (--thread->ref_count > 0) { + /* + * Task or processor_set made extra reference. + */ + thread_unlock(thread); + (void) splx(s); + task_unlock(task); + pset_unlock(pset); + return; + } + + /* + * Thread has no references - we can remove it. + */ + + /* + * Remove pending timeouts. + */ + reset_timeout_check(&thread->timer); + + reset_timeout_check(&thread->depress_timer); + thread->depress_priority = -1; + + /* + * Accumulate times for dead threads in task. + */ + thread_read_times(thread, &user_time, &system_time); + time_value_add(&task->total_user_time, &user_time); + time_value_add(&task->total_system_time, &system_time); + + /* + * Remove thread from task list and processor_set threads list. + */ + task->thread_count--; + queue_remove(&task->thread_list, thread, thread_t, thread_list); + + pset_remove_thread(pset, thread); + + thread_unlock(thread); /* no more references - safe */ + (void) splx(s); + task_unlock(task); + pset_unlock(pset); + pset_deallocate(pset); + + /* + * A couple of quick sanity checks + */ + + if (thread == current_thread()) { + panic("thread deallocating itself"); + } + if ((thread->state & ~(TH_RUN | TH_HALTED | TH_SWAPPED)) != TH_SUSP) + panic("unstopped thread destroyed!"); + + /* + * Deallocate the task reference, since we know the thread + * is not running. + */ + task_deallocate(thread->task); /* may block */ + + /* + * Clean up any machine-dependent resources. + */ + if ((thread->state & TH_SWAPPED) == 0) { + spl_t _s_ = splsched(); + stack_free(thread); + (void) splx(s); + thread_deallocate_stack++; + } + /* + * Rattle the event count machinery (gag) + */ + evc_notify_abort(thread); + + pcb_terminate(thread); + zfree(thread_zone, (vm_offset_t) thread); +} + +void thread_reference( + register thread_t thread) +{ + spl_t s; + + if (thread == THREAD_NULL) + return; + + s = splsched(); + thread_lock(thread); + thread->ref_count++; + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_terminate: + * + * Permanently stop execution of the specified thread. + * + * A thread to be terminated must be allowed to clean up any state + * that it has before it exits. The thread is broken out of any + * wait condition that it is in, and signalled to exit. It then + * cleans up its state and calls thread_halt_self on its way out of + * the kernel. The caller waits for the thread to halt, terminates + * its IPC state, and then deallocates it. + * + * If the caller is the current thread, it must still exit the kernel + * to clean up any state (thread and port references, messages, etc). + * When it exits the kernel, it then terminates its IPC state and + * queues itself for the reaper thread, which will wait for the thread + * to stop and then deallocate it. (A thread cannot deallocate itself, + * since it needs a kernel stack to execute.) + */ +kern_return_t thread_terminate( + register thread_t thread) +{ + register thread_t cur_thread = current_thread(); + register task_t cur_task; + spl_t s; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * Break IPC control over the thread. + */ + ipc_thread_disable(thread); + + if (thread == cur_thread) { + + /* + * Current thread will queue itself for reaper when + * exiting kernel. + */ + s = splsched(); + thread_lock(thread); + if (thread->active) { + thread->active = FALSE; + thread_ast_set(thread, AST_TERMINATE); + } + thread_unlock(thread); + ast_on(cpu_number(), AST_TERMINATE); + splx(s); + return KERN_SUCCESS; + } + + /* + * Lock both threads and the current task + * to check termination races and prevent deadlocks. + */ + cur_task = current_task(); + task_lock(cur_task); + s = splsched(); + if ((vm_offset_t)thread < (vm_offset_t)cur_thread) { + thread_lock(thread); + thread_lock(cur_thread); + } + else { + thread_lock(cur_thread); + thread_lock(thread); + } + + /* + * If the current thread is being terminated, help out. + */ + if ((!cur_task->active) || (!cur_thread->active)) { + thread_unlock(cur_thread); + thread_unlock(thread); + (void) splx(s); + task_unlock(cur_task); + thread_terminate(cur_thread); + return KERN_FAILURE; + } + + thread_unlock(cur_thread); + task_unlock(cur_task); + + /* + * Terminate victim thread. + */ + if (!thread->active) { + /* + * Someone else got there first. + */ + thread_unlock(thread); + (void) splx(s); + return KERN_FAILURE; + } + + thread->active = FALSE; + + thread_unlock(thread); + (void) splx(s); + +#if MACH_HOST + /* + * Reassign thread to default pset if needed. + */ + thread_freeze(thread); + if (thread->processor_set != &default_pset) { + thread_doassign(thread, &default_pset, FALSE); + } +#endif /* MACH_HOST */ + + /* + * Halt the victim at the clean point. + */ + (void) thread_halt(thread, TRUE); +#if MACH_HOST + thread_unfreeze(thread); +#endif /* MACH_HOST */ + /* + * Shut down the victims IPC and deallocate its + * reference to itself. + */ + ipc_thread_terminate(thread); +#if NET_ATM + mk_waited_collect(thread); +#endif + thread_deallocate(thread); + return KERN_SUCCESS; +} + +/* + * thread_force_terminate: + * + * Version of thread_terminate called by task_terminate. thread is + * not the current thread. task_terminate is the dominant operation, + * so we can force this thread to stop. + */ +void +thread_force_terminate( + register thread_t thread) +{ + boolean_t deallocate_here = FALSE; + spl_t s; + + ipc_thread_disable(thread); + +#if MACH_HOST + /* + * Reassign thread to default pset if needed. + */ + thread_freeze(thread); + if (thread->processor_set != &default_pset) + thread_doassign(thread, &default_pset, FALSE); +#endif /* MACH_HOST */ + + s = splsched(); + thread_lock(thread); + deallocate_here = thread->active; + thread->active = FALSE; + thread_unlock(thread); + (void) splx(s); + + (void) thread_halt(thread, TRUE); + ipc_thread_terminate(thread); +#if NET_ATM + mk_waited_collect(thread); +#endif + +#if MACH_HOST + thread_unfreeze(thread); +#endif /* MACH_HOST */ + + if (deallocate_here) + thread_deallocate(thread); +} + + +/* + * Halt a thread at a clean point, leaving it suspended. + * + * must_halt indicates whether thread must halt. + * + */ +kern_return_t thread_halt( + register thread_t thread, + boolean_t must_halt) +{ + register thread_t cur_thread = current_thread(); + register kern_return_t ret; + spl_t s; + + if (thread == cur_thread) + panic("thread_halt: trying to halt current thread."); + /* + * If must_halt is FALSE, then a check must be made for + * a cycle of halt operations. + */ + if (!must_halt) { + /* + * Grab both thread locks. + */ + s = splsched(); + if ((vm_offset_t)thread < (vm_offset_t)cur_thread) { + thread_lock(thread); + thread_lock(cur_thread); + } + else { + thread_lock(cur_thread); + thread_lock(thread); + } + + /* + * If target thread is already halted, grab a hold + * on it and return. + */ + if (thread->state & TH_HALTED) { + thread->suspend_count++; + thread_unlock(cur_thread); + thread_unlock(thread); + (void) splx(s); + return KERN_SUCCESS; + } + + /* + * If someone is trying to halt us, we have a potential + * halt cycle. Break the cycle by interrupting anyone + * who is trying to halt us, and causing this operation + * to fail; retry logic will only retry operations + * that cannot deadlock. (If must_halt is TRUE, this + * operation can never cause a deadlock.) + */ + if (cur_thread->ast & AST_HALT) { + thread_wakeup_with_result((event_t)&cur_thread->wake_active, + THREAD_INTERRUPTED); + thread_unlock(thread); + thread_unlock(cur_thread); + (void) splx(s); + return KERN_FAILURE; + } + + thread_unlock(cur_thread); + + } + else { + /* + * Lock thread and check whether it is already halted. + */ + s = splsched(); + thread_lock(thread); + if (thread->state & TH_HALTED) { + thread->suspend_count++; + thread_unlock(thread); + (void) splx(s); + return KERN_SUCCESS; + } + } + + /* + * Suspend thread - inline version of thread_hold() because + * thread is already locked. + */ + thread->suspend_count++; + thread->state |= TH_SUSP; + + /* + * If someone else is halting it, wait for that to complete. + * Fail if wait interrupted and must_halt is false. + */ + while ((thread->ast & AST_HALT) && (!(thread->state & TH_HALTED))) { + thread->wake_active = TRUE; + thread_sleep((event_t) &thread->wake_active, + simple_lock_addr(thread->lock), TRUE); + + if (thread->state & TH_HALTED) { + (void) splx(s); + return KERN_SUCCESS; + } + if ((current_thread()->wait_result != THREAD_AWAKENED) + && !(must_halt)) { + (void) splx(s); + thread_release(thread); + return KERN_FAILURE; + } + thread_lock(thread); + } + + /* + * Otherwise, have to do it ourselves. + */ + + thread_ast_set(thread, AST_HALT); + + while (TRUE) { + /* + * Wait for thread to stop. + */ + thread_unlock(thread); + (void) splx(s); + + ret = thread_dowait(thread, must_halt); + + /* + * If the dowait failed, so do we. Drop AST_HALT, and + * wake up anyone else who might be waiting for it. + */ + if (ret != KERN_SUCCESS) { + s = splsched(); + thread_lock(thread); + thread_ast_clear(thread, AST_HALT); + thread_wakeup_with_result((event_t)&thread->wake_active, + THREAD_INTERRUPTED); + thread_unlock(thread); + (void) splx(s); + + thread_release(thread); + return ret; + } + + /* + * Clear any interruptible wait. + */ + clear_wait(thread, THREAD_INTERRUPTED, TRUE); + + /* + * If the thread's at a clean point, we're done. + * Don't need a lock because it really is stopped. + */ + if (thread->state & TH_HALTED) { + return KERN_SUCCESS; + } + + /* + * If the thread is at a nice continuation, + * or a continuation with a cleanup routine, + * call the cleanup routine. + */ + if ((((thread->swap_func == mach_msg_continue) || + (thread->swap_func == mach_msg_receive_continue)) && + mach_msg_interrupt(thread)) || + (thread->swap_func == thread_exception_return) || + (thread->swap_func == thread_bootstrap_return)) { + s = splsched(); + thread_lock(thread); + thread->state |= TH_HALTED; + thread_ast_clear(thread, AST_HALT); + thread_unlock(thread); + splx(s); + + return KERN_SUCCESS; + } + + /* + * Force the thread to stop at a clean + * point, and arrange to wait for it. + * + * Set it running, so it can notice. Override + * the suspend count. We know that the thread + * is suspended and not waiting. + * + * Since the thread may hit an interruptible wait + * before it reaches a clean point, we must force it + * to wake us up when it does so. This involves some + * trickery: + * We mark the thread SUSPENDED so that thread_block + * will suspend it and wake us up. + * We mark the thread RUNNING so that it will run. + * We mark the thread UN-INTERRUPTIBLE (!) so that + * some other thread trying to halt or suspend it won't + * take it off the run queue before it runs. Since + * dispatching a thread (the tail of thread_invoke) marks + * the thread interruptible, it will stop at the next + * context switch or interruptible wait. + */ + + s = splsched(); + thread_lock(thread); + if ((thread->state & TH_SCHED_STATE) != TH_SUSP) + panic("thread_halt"); + thread->state |= TH_RUN | TH_UNINT; + thread_setrun(thread, FALSE); + + /* + * Continue loop and wait for thread to stop. + */ + } +} + +void walking_zombie(void) +{ + panic("the zombie walks!"); +} + +/* + * Thread calls this routine on exit from the kernel when it + * notices a halt request. + */ +void thread_halt_self(void) +{ + register thread_t thread = current_thread(); + spl_t s; + + if (thread->ast & AST_TERMINATE) { + /* + * Thread is terminating itself. Shut + * down IPC, then queue it up for the + * reaper thread. + */ + ipc_thread_terminate(thread); +#if NET_ATM + mk_waited_collect(thread); +#endif + + thread_hold(thread); + + s = splsched(); + simple_lock(&reaper_lock); + enqueue_tail(&reaper_queue, (queue_entry_t) thread); + simple_unlock(&reaper_lock); + + thread_lock(thread); + thread->state |= TH_HALTED; + thread_unlock(thread); + (void) splx(s); + + thread_wakeup((event_t)&reaper_queue); + counter(c_thread_halt_self_block++); + thread_block(walking_zombie); + /*NOTREACHED*/ + } else { + /* + * Thread was asked to halt - show that it + * has done so. + */ + s = splsched(); + thread_lock(thread); + thread->state |= TH_HALTED; + thread_ast_clear(thread, AST_HALT); + thread_unlock(thread); + splx(s); + counter(c_thread_halt_self_block++); + thread_block(thread_exception_return); + /* + * thread_release resets TH_HALTED. + */ + } +} + +/* + * thread_hold: + * + * Suspend execution of the specified thread. + * This is a recursive-style suspension of the thread, a count of + * suspends is maintained. + */ +void thread_hold( + register thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + thread->suspend_count++; + thread->state |= TH_SUSP; + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_dowait: + * + * Wait for a thread to actually enter stopped state. + * + * must_halt argument indicates if this may fail on interruption. + * This is FALSE only if called from thread_abort via thread_halt. + */ +kern_return_t +thread_dowait( + register thread_t thread, + boolean_t must_halt) +{ + register boolean_t need_wakeup; + register kern_return_t ret = KERN_SUCCESS; + spl_t s; + + if (thread == current_thread()) + panic("thread_dowait"); + + /* + * If a thread is not interruptible, it may not be suspended + * until it becomes interruptible. In this case, we wait for + * the thread to stop itself, and indicate that we are waiting + * for it to stop so that it can wake us up when it does stop. + * + * If the thread is interruptible, we may be able to suspend + * it immediately. There are several cases: + * + * 1) The thread is already stopped (trivial) + * 2) The thread is runnable (marked RUN and on a run queue). + * We pull it off the run queue and mark it stopped. + * 3) The thread is running. We wait for it to stop. + */ + + need_wakeup = FALSE; + s = splsched(); + thread_lock(thread); + + for (;;) { + switch (thread->state & TH_SCHED_STATE) { + case TH_SUSP: + case TH_WAIT | TH_SUSP: + /* + * Thread is already suspended, or sleeping in an + * interruptible wait. We win! + */ + break; + + case TH_RUN | TH_SUSP: + /* + * The thread is interruptible. If we can pull + * it off a runq, stop it here. + */ + if (rem_runq(thread) != RUN_QUEUE_NULL) { + thread->state &= ~TH_RUN; + need_wakeup = thread->wake_active; + thread->wake_active = FALSE; + break; + } +#if NCPUS > 1 + /* + * The thread must be running, so make its + * processor execute ast_check(). This + * should cause the thread to take an ast and + * context switch to suspend for us. + */ + cause_ast_check(thread->last_processor); +#endif /* NCPUS > 1 */ + + /* + * Fall through to wait for thread to stop. + */ + + case TH_RUN | TH_SUSP | TH_UNINT: + case TH_RUN | TH_WAIT | TH_SUSP: + case TH_RUN | TH_WAIT | TH_SUSP | TH_UNINT: + case TH_WAIT | TH_SUSP | TH_UNINT: + /* + * Wait for the thread to stop, or sleep interruptibly + * (thread_block will stop it in the latter case). + * Check for failure if interrupted. + */ + thread->wake_active = TRUE; + thread_sleep((event_t) &thread->wake_active, + simple_lock_addr(thread->lock), TRUE); + thread_lock(thread); + if ((current_thread()->wait_result != THREAD_AWAKENED) && + !must_halt) { + ret = KERN_FAILURE; + break; + } + + /* + * Repeat loop to check thread`s state. + */ + continue; + } + /* + * Thread is stopped at this point. + */ + break; + } + + thread_unlock(thread); + (void) splx(s); + + if (need_wakeup) + thread_wakeup((event_t) &thread->wake_active); + + return ret; +} + +void thread_release( + register thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + if (--thread->suspend_count == 0) { + thread->state &= ~(TH_SUSP | TH_HALTED); + if ((thread->state & (TH_WAIT | TH_RUN)) == 0) { + /* was only suspended */ + thread->state |= TH_RUN; + thread_setrun(thread, TRUE); + } + } + thread_unlock(thread); + (void) splx(s); +} + +kern_return_t thread_suspend( + register thread_t thread) +{ + register boolean_t hold; + spl_t spl; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + hold = FALSE; + spl = splsched(); + thread_lock(thread); + if (thread->user_stop_count++ == 0) { + hold = TRUE; + thread->suspend_count++; + thread->state |= TH_SUSP; + } + thread_unlock(thread); + (void) splx(spl); + + /* + * Now wait for the thread if necessary. + */ + if (hold) { + if (thread == current_thread()) { + /* + * We want to call thread_block on our way out, + * to stop running. + */ + spl = splsched(); + ast_on(cpu_number(), AST_BLOCK); + (void) splx(spl); + } else + (void) thread_dowait(thread, TRUE); + } + return KERN_SUCCESS; +} + + +kern_return_t thread_resume( + register thread_t thread) +{ + register kern_return_t ret; + spl_t s; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + ret = KERN_SUCCESS; + + s = splsched(); + thread_lock(thread); + if (thread->user_stop_count > 0) { + if (--thread->user_stop_count == 0) { + if (--thread->suspend_count == 0) { + thread->state &= ~(TH_SUSP | TH_HALTED); + if ((thread->state & (TH_WAIT | TH_RUN)) == 0) { + /* was only suspended */ + thread->state |= TH_RUN; + thread_setrun(thread, TRUE); + } + } + } + } + else { + ret = KERN_FAILURE; + } + + thread_unlock(thread); + (void) splx(s); + + return ret; +} + +/* + * Return thread's machine-dependent state. + */ +kern_return_t thread_get_state( + register thread_t thread, + int flavor, + thread_state_t old_state, /* pointer to OUT array */ + natural_t *old_state_count) /*IN/OUT*/ +{ + kern_return_t ret; + + if (thread == THREAD_NULL || thread == current_thread()) { + return KERN_INVALID_ARGUMENT; + } + + thread_hold(thread); + (void) thread_dowait(thread, TRUE); + + ret = thread_getstatus(thread, flavor, old_state, old_state_count); + + thread_release(thread); + return ret; +} + +/* + * Change thread's machine-dependent state. + */ +kern_return_t thread_set_state( + register thread_t thread, + int flavor, + thread_state_t new_state, + natural_t new_state_count) +{ + kern_return_t ret; + + if (thread == THREAD_NULL || thread == current_thread()) { + return KERN_INVALID_ARGUMENT; + } + + thread_hold(thread); + (void) thread_dowait(thread, TRUE); + + ret = thread_setstatus(thread, flavor, new_state, new_state_count); + + thread_release(thread); + return ret; +} + +kern_return_t thread_info( + register thread_t thread, + int flavor, + thread_info_t thread_info_out, /* pointer to OUT array */ + natural_t *thread_info_count) /*IN/OUT*/ +{ + int state, flags; + spl_t s; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + if (flavor == THREAD_BASIC_INFO) { + register thread_basic_info_t basic_info; + + if (*thread_info_count < THREAD_BASIC_INFO_COUNT) { + return KERN_INVALID_ARGUMENT; + } + + basic_info = (thread_basic_info_t) thread_info_out; + + s = splsched(); + thread_lock(thread); + + /* + * Update lazy-evaluated scheduler info because someone wants it. + */ + if ((thread->state & TH_RUN) == 0 && + thread->sched_stamp != sched_tick) + update_priority(thread); + + /* fill in info */ + + thread_read_times(thread, + &basic_info->user_time, + &basic_info->system_time); + basic_info->base_priority = thread->priority; + basic_info->cur_priority = thread->sched_pri; + + /* + * To calculate cpu_usage, first correct for timer rate, + * then for 5/8 ageing. The correction factor [3/5] is + * (1/(5/8) - 1). + */ + basic_info->cpu_usage = thread->cpu_usage / + (TIMER_RATE/TH_USAGE_SCALE); + basic_info->cpu_usage = (basic_info->cpu_usage * 3) / 5; +#if SIMPLE_CLOCK + /* + * Clock drift compensation. + */ + basic_info->cpu_usage = + (basic_info->cpu_usage * 1000000)/sched_usec; +#endif /* SIMPLE_CLOCK */ + + if (thread->state & TH_SWAPPED) + flags = TH_FLAGS_SWAPPED; + else if (thread->state & TH_IDLE) + flags = TH_FLAGS_IDLE; + else + flags = 0; + + if (thread->state & TH_HALTED) + state = TH_STATE_HALTED; + else + if (thread->state & TH_RUN) + state = TH_STATE_RUNNING; + else + if (thread->state & TH_UNINT) + state = TH_STATE_UNINTERRUPTIBLE; + else + if (thread->state & TH_SUSP) + state = TH_STATE_STOPPED; + else + if (thread->state & TH_WAIT) + state = TH_STATE_WAITING; + else + state = 0; /* ? */ + + basic_info->run_state = state; + basic_info->flags = flags; + basic_info->suspend_count = thread->user_stop_count; + if (state == TH_STATE_RUNNING) + basic_info->sleep_time = 0; + else + basic_info->sleep_time = sched_tick - thread->sched_stamp; + + thread_unlock(thread); + splx(s); + + *thread_info_count = THREAD_BASIC_INFO_COUNT; + return KERN_SUCCESS; + } + else if (flavor == THREAD_SCHED_INFO) { + register thread_sched_info_t sched_info; + + if (*thread_info_count < THREAD_SCHED_INFO_COUNT) { + return KERN_INVALID_ARGUMENT; + } + + sched_info = (thread_sched_info_t) thread_info_out; + + s = splsched(); + thread_lock(thread); + +#if MACH_FIXPRI + sched_info->policy = thread->policy; + if (thread->policy == POLICY_FIXEDPRI) { + sched_info->data = (thread->sched_data * tick)/1000; + } + else { + sched_info->data = 0; + } +#else /* MACH_FIXPRI */ + sched_info->policy = POLICY_TIMESHARE; + sched_info->data = 0; +#endif /* MACH_FIXPRI */ + + sched_info->base_priority = thread->priority; + sched_info->max_priority = thread->max_priority; + sched_info->cur_priority = thread->sched_pri; + + sched_info->depressed = (thread->depress_priority >= 0); + sched_info->depress_priority = thread->depress_priority; + + thread_unlock(thread); + splx(s); + + *thread_info_count = THREAD_SCHED_INFO_COUNT; + return KERN_SUCCESS; + } + + return KERN_INVALID_ARGUMENT; +} + +kern_return_t thread_abort( + register thread_t thread) +{ + if (thread == THREAD_NULL || thread == current_thread()) { + return KERN_INVALID_ARGUMENT; + } + + /* + * + * clear it of an event wait + */ + evc_notify_abort(thread); + + /* + * Try to force the thread to a clean point + * If the halt operation fails return KERN_ABORTED. + * ipc code will convert this to an ipc interrupted error code. + */ + if (thread_halt(thread, FALSE) != KERN_SUCCESS) + return KERN_ABORTED; + + /* + * If the thread was in an exception, abort that too. + */ + mach_msg_abort_rpc(thread); + + /* + * Then set it going again. + */ + thread_release(thread); + + /* + * Also abort any depression. + */ + if (thread->depress_priority != -1) + thread_depress_abort(thread); + + return KERN_SUCCESS; +} + +/* + * thread_start: + * + * Start a thread at the specified routine. + * The thread must be in a swapped state. + */ + +void +thread_start( + thread_t thread, + continuation_t start) +{ + thread->swap_func = start; +} + +/* + * kernel_thread: + * + * Start up a kernel thread in the specified task. + */ + +thread_t kernel_thread( + task_t task, + continuation_t start, + void * arg) +{ + thread_t thread; + + (void) thread_create(task, &thread); + /* release "extra" ref that thread_create gave us */ + thread_deallocate(thread); + thread_start(thread, start); + thread->ith_other = arg; + + /* + * We ensure that the kernel thread starts with a stack. + * The swapin mechanism might not be operational yet. + */ + thread_doswapin(thread); + thread->max_priority = BASEPRI_SYSTEM; + thread->priority = BASEPRI_SYSTEM; + thread->sched_pri = BASEPRI_SYSTEM; + (void) thread_resume(thread); + return thread; +} + +/* + * reaper_thread: + * + * This kernel thread runs forever looking for threads to destroy + * (when they request that they be destroyed, of course). + */ +void reaper_thread_continue(void) +{ + for (;;) { + register thread_t thread; + spl_t s; + + s = splsched(); + simple_lock(&reaper_lock); + + while ((thread = (thread_t) dequeue_head(&reaper_queue)) + != THREAD_NULL) { + simple_unlock(&reaper_lock); + (void) splx(s); + + (void) thread_dowait(thread, TRUE); /* may block */ + thread_deallocate(thread); /* may block */ + + s = splsched(); + simple_lock(&reaper_lock); + } + + assert_wait((event_t) &reaper_queue, FALSE); + simple_unlock(&reaper_lock); + (void) splx(s); + counter(c_reaper_thread_block++); + thread_block(reaper_thread_continue); + } +} + +void reaper_thread(void) +{ + reaper_thread_continue(); + /*NOTREACHED*/ +} + +#if MACH_HOST +/* + * thread_assign: + * + * Change processor set assignment. + * Caller must hold an extra reference to the thread (if this is + * called directly from the ipc interface, this is an operation + * in progress reference). Caller must hold no locks -- this may block. + */ + +kern_return_t +thread_assign( + thread_t thread, + processor_set_t new_pset) +{ + if (thread == THREAD_NULL || new_pset == PROCESSOR_SET_NULL) { + return KERN_INVALID_ARGUMENT; + } + + thread_freeze(thread); + thread_doassign(thread, new_pset, TRUE); + + return KERN_SUCCESS; +} + +/* + * thread_freeze: + * + * Freeze thread's assignment. Prelude to assigning thread. + * Only one freeze may be held per thread. + */ +void +thread_freeze( + thread_t thread) +{ + spl_t s; + /* + * Freeze the assignment, deferring to a prior freeze. + */ + s = splsched(); + thread_lock(thread); + while (thread->may_assign == FALSE) { + thread->assign_active = TRUE; + thread_sleep((event_t) &thread->assign_active, + simple_lock_addr(thread->lock), FALSE); + thread_lock(thread); + } + thread->may_assign = FALSE; + thread_unlock(thread); + (void) splx(s); + +} + +/* + * thread_unfreeze: release freeze on thread's assignment. + */ +void +thread_unfreeze( + thread_t thread) +{ + spl_t s; + + s = splsched(); + thread_lock(thread); + thread->may_assign = TRUE; + if (thread->assign_active) { + thread->assign_active = FALSE; + thread_wakeup((event_t)&thread->assign_active); + } + thread_unlock(thread); + splx(s); +} + +/* + * thread_doassign: + * + * Actually do thread assignment. thread_will_assign must have been + * called on the thread. release_freeze argument indicates whether + * to release freeze on thread. + */ + +void +thread_doassign( + register thread_t thread, + register processor_set_t new_pset, + boolean_t release_freeze) +{ + register processor_set_t pset; + register boolean_t old_empty, new_empty; + boolean_t recompute_pri = FALSE; + spl_t s; + + /* + * Check for silly no-op. + */ + pset = thread->processor_set; + if (pset == new_pset) { + if (release_freeze) + thread_unfreeze(thread); + return; + } + /* + * Suspend the thread and stop it if it's not the current thread. + */ + thread_hold(thread); + if (thread != current_thread()) + (void) thread_dowait(thread, TRUE); + + /* + * Lock both psets now, use ordering to avoid deadlocks. + */ +Restart: + if ((vm_offset_t)pset < (vm_offset_t)new_pset) { + pset_lock(pset); + pset_lock(new_pset); + } + else { + pset_lock(new_pset); + pset_lock(pset); + } + + /* + * Check if new_pset is ok to assign to. If not, reassign + * to default_pset. + */ + if (!new_pset->active) { + pset_unlock(pset); + pset_unlock(new_pset); + new_pset = &default_pset; + goto Restart; + } + + pset_reference(new_pset); + + /* + * Grab the thread lock and move the thread. + * Then drop the lock on the old pset and the thread's + * reference to it. + */ + s = splsched(); + thread_lock(thread); + + thread_change_psets(thread, pset, new_pset); + + old_empty = pset->empty; + new_empty = new_pset->empty; + + pset_unlock(pset); + + /* + * Reset policy and priorities if needed. + */ +#if MACH_FIXPRI + if (thread->policy & new_pset->policies == 0) { + thread->policy = POLICY_TIMESHARE; + recompute_pri = TRUE; + } +#endif /* MACH_FIXPRI */ + + if (thread->max_priority < new_pset->max_priority) { + thread->max_priority = new_pset->max_priority; + if (thread->priority < thread->max_priority) { + thread->priority = thread->max_priority; + recompute_pri = TRUE; + } + else { + if ((thread->depress_priority >= 0) && + (thread->depress_priority < thread->max_priority)) { + thread->depress_priority = thread->max_priority; + } + } + } + + pset_unlock(new_pset); + + if (recompute_pri) + compute_priority(thread, TRUE); + + if (release_freeze) { + thread->may_assign = TRUE; + if (thread->assign_active) { + thread->assign_active = FALSE; + thread_wakeup((event_t)&thread->assign_active); + } + } + + thread_unlock(thread); + splx(s); + + pset_deallocate(pset); + + /* + * Figure out hold status of thread. Threads assigned to empty + * psets must be held. Therefore: + * If old pset was empty release its hold. + * Release our hold from above unless new pset is empty. + */ + + if (old_empty) + thread_release(thread); + if (!new_empty) + thread_release(thread); + + /* + * If current_thread is assigned, context switch to force + * assignment to happen. This also causes hold to take + * effect if the new pset is empty. + */ + if (thread == current_thread()) { + s = splsched(); + ast_on(cpu_number(), AST_BLOCK); + (void) splx(s); + } +} +#else /* MACH_HOST */ +kern_return_t +thread_assign( + thread_t thread, + processor_set_t new_pset) +{ + return KERN_FAILURE; +} +#endif /* MACH_HOST */ + +/* + * thread_assign_default: + * + * Special version of thread_assign for assigning threads to default + * processor set. + */ +kern_return_t +thread_assign_default( + thread_t thread) +{ + return thread_assign(thread, &default_pset); +} + +/* + * thread_get_assignment + * + * Return current assignment for this thread. + */ +kern_return_t thread_get_assignment( + thread_t thread, + processor_set_t *pset) +{ + *pset = thread->processor_set; + pset_reference(*pset); + return KERN_SUCCESS; +} + +/* + * thread_priority: + * + * Set priority (and possibly max priority) for thread. + */ +kern_return_t +thread_priority( + thread_t thread, + int priority, + boolean_t set_max) +{ + spl_t s; + kern_return_t ret = KERN_SUCCESS; + + if ((thread == THREAD_NULL) || invalid_pri(priority)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + thread_lock(thread); + + /* + * Check for violation of max priority + */ + if (priority < thread->max_priority) { + ret = KERN_FAILURE; + } + else { + /* + * Set priorities. If a depression is in progress, + * change the priority to restore. + */ + if (thread->depress_priority >= 0) { + thread->depress_priority = priority; + } + else { + thread->priority = priority; + compute_priority(thread, TRUE); + } + + if (set_max) + thread->max_priority = priority; + } + thread_unlock(thread); + (void) splx(s); + + return ret; +} + +/* + * thread_set_own_priority: + * + * Internal use only; sets the priority of the calling thread. + * Will adjust max_priority if necessary. + */ +void +thread_set_own_priority( + int priority) +{ + spl_t s; + thread_t thread = current_thread(); + + s = splsched(); + thread_lock(thread); + + if (priority < thread->max_priority) + thread->max_priority = priority; + thread->priority = priority; + compute_priority(thread, TRUE); + + thread_unlock(thread); + (void) splx(s); +} + +/* + * thread_max_priority: + * + * Reset the max priority for a thread. + */ +kern_return_t +thread_max_priority( + thread_t thread, + processor_set_t pset, + int max_priority) +{ + spl_t s; + kern_return_t ret = KERN_SUCCESS; + + if ((thread == THREAD_NULL) || (pset == PROCESSOR_SET_NULL) || + invalid_pri(max_priority)) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + thread_lock(thread); + +#if MACH_HOST + /* + * Check for wrong processor set. + */ + if (pset != thread->processor_set) { + ret = KERN_FAILURE; + } + else { +#endif /* MACH_HOST */ + thread->max_priority = max_priority; + + /* + * Reset priority if it violates new max priority + */ + if (max_priority > thread->priority) { + thread->priority = max_priority; + + compute_priority(thread, TRUE); + } + else { + if (thread->depress_priority >= 0 && + max_priority > thread->depress_priority) + thread->depress_priority = max_priority; + } +#if MACH_HOST + } +#endif /* MACH_HOST */ + + thread_unlock(thread); + (void) splx(s); + + return ret; +} + +/* + * thread_policy: + * + * Set scheduling policy for thread. + */ +kern_return_t +thread_policy( + thread_t thread, + int policy, + int data) +{ +#if MACH_FIXPRI + register kern_return_t ret = KERN_SUCCESS; + register int temp; + spl_t s; +#endif /* MACH_FIXPRI */ + + if ((thread == THREAD_NULL) || invalid_policy(policy)) + return KERN_INVALID_ARGUMENT; + +#if MACH_FIXPRI + s = splsched(); + thread_lock(thread); + + /* + * Check if changing policy. + */ + if (policy == thread->policy) { + /* + * Just changing data. This is meaningless for + * timesharing, quantum for fixed priority (but + * has no effect until current quantum runs out). + */ + if (policy == POLICY_FIXEDPRI) { + temp = data * 1000; + if (temp % tick) + temp += tick; + thread->sched_data = temp/tick; + } + } + else { + /* + * Changing policy. Check if new policy is allowed. + */ + if ((thread->processor_set->policies & policy) == 0) { + ret = KERN_FAILURE; + } + else { + /* + * Changing policy. Save data and calculate new + * priority. + */ + thread->policy = policy; + if (policy == POLICY_FIXEDPRI) { + temp = data * 1000; + if (temp % tick) + temp += tick; + thread->sched_data = temp/tick; + } + compute_priority(thread, TRUE); + } + } + thread_unlock(thread); + (void) splx(s); + + return ret; +#else /* MACH_FIXPRI */ + if (policy == POLICY_TIMESHARE) + return KERN_SUCCESS; + else + return KERN_FAILURE; +#endif /* MACH_FIXPRI */ +} + +/* + * thread_wire: + * + * Specify that the target thread must always be able + * to run and to allocate memory. + */ +kern_return_t +thread_wire( + host_t host, + thread_t thread, + boolean_t wired) +{ + spl_t s; + + if (host == HOST_NULL) + return KERN_INVALID_ARGUMENT; + + if (thread == THREAD_NULL) + return KERN_INVALID_ARGUMENT; + + /* + * This implementation only works for the current thread. + * See stack_privilege. + */ + if (thread != current_thread()) + return KERN_INVALID_ARGUMENT; + + s = splsched(); + thread_lock(thread); + + if (wired) { + thread->vm_privilege = TRUE; + stack_privilege(thread); + } + else { + thread->vm_privilege = FALSE; +/*XXX stack_unprivilege(thread); */ + thread->stack_privilege = 0; + } + + thread_unlock(thread); + splx(s); + + return KERN_SUCCESS; +} + +/* + * thread_collect_scan: + * + * Attempt to free resources owned by threads. + * pcb_collect doesn't do anything yet. + */ + +void thread_collect_scan(void) +{ +#if 0 + register thread_t thread, prev_thread; + processor_set_t pset, prev_pset; + + prev_thread = THREAD_NULL; + prev_pset = PROCESSOR_SET_NULL; + + simple_lock(&all_psets_lock); + queue_iterate(&all_psets, pset, processor_set_t, all_psets) { + pset_lock(pset); + queue_iterate(&pset->threads, thread, thread_t, pset_threads) { + spl_t s = splsched(); + thread_lock(thread); + + /* + * Only collect threads which are + * not runnable and are swapped. + */ + + if ((thread->state & (TH_RUN|TH_SWAPPED)) + == TH_SWAPPED) { + thread->ref_count++; + thread_unlock(thread); + (void) splx(s); + pset->ref_count++; + pset_unlock(pset); + simple_unlock(&all_psets_lock); + + pcb_collect(thread); + + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + prev_thread = thread; + + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); + prev_pset = pset; + + simple_lock(&all_psets_lock); + pset_lock(pset); + } else { + thread_unlock(thread); + (void) splx(s); + } + } + pset_unlock(pset); + } + simple_unlock(&all_psets_lock); + + if (prev_thread != THREAD_NULL) + thread_deallocate(prev_thread); + if (prev_pset != PROCESSOR_SET_NULL) + pset_deallocate(prev_pset); +#endif /* 0 */ +} + +boolean_t thread_collect_allowed = TRUE; +unsigned thread_collect_last_tick = 0; +unsigned thread_collect_max_rate = 0; /* in ticks */ + +/* + * consider_thread_collect: + * + * Called by the pageout daemon when the system needs more free pages. + */ + +void consider_thread_collect(void) +{ + /* + * By default, don't attempt thread collection more frequently + * than once a second. + */ + + if (thread_collect_max_rate == 0) + thread_collect_max_rate = hz; + + if (thread_collect_allowed && + (sched_tick > + (thread_collect_last_tick + thread_collect_max_rate))) { + thread_collect_last_tick = sched_tick; + thread_collect_scan(); + } +} + +#if MACH_DEBUG + +vm_size_t stack_usage( + register vm_offset_t stack) +{ + int i; + + for (i = 0; i < KERNEL_STACK_SIZE/sizeof(unsigned int); i++) + if (((unsigned int *)stack)[i] != STACK_MARKER) + break; + + return KERNEL_STACK_SIZE - i * sizeof(unsigned int); +} + +/* + * Machine-dependent code should call stack_init + * before doing its own initialization of the stack. + */ + +void stack_init( + register vm_offset_t stack) +{ + if (stack_check_usage) { + int i; + + for (i = 0; i < KERNEL_STACK_SIZE/sizeof(unsigned int); i++) + ((unsigned int *)stack)[i] = STACK_MARKER; + } +} + +/* + * Machine-dependent code should call stack_finalize + * before releasing the stack memory. + */ + +void stack_finalize( + register vm_offset_t stack) +{ + if (stack_check_usage) { + vm_size_t used = stack_usage(stack); + + simple_lock(&stack_usage_lock); + if (used > stack_max_usage) + stack_max_usage = used; + simple_unlock(&stack_usage_lock); + } +} + +#ifndef MACHINE_STACK +/* + * stack_statistics: + * + * Return statistics on cached kernel stacks. + * *maxusagep must be initialized by the caller. + */ + +void stack_statistics( + natural_t *totalp, + vm_size_t *maxusagep) +{ + spl_t s; + + s = splsched(); + stack_lock(); + if (stack_check_usage) { + vm_offset_t stack; + + /* + * This is pretty expensive to do at splsched, + * but it only happens when someone makes + * a debugging call, so it should be OK. + */ + + for (stack = stack_free_list; stack != 0; + stack = stack_next(stack)) { + vm_size_t usage = stack_usage(stack); + + if (usage > *maxusagep) + *maxusagep = usage; + } + } + + *totalp = stack_free_count; + stack_unlock(); + (void) splx(s); +} +#endif /* MACHINE_STACK */ + +kern_return_t host_stack_usage( + host_t host, + vm_size_t *reservedp, + unsigned int *totalp, + vm_size_t *spacep, + vm_size_t *residentp, + vm_size_t *maxusagep, + vm_offset_t *maxstackp) +{ + unsigned int total; + vm_size_t maxusage; + + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + simple_lock(&stack_usage_lock); + maxusage = stack_max_usage; + simple_unlock(&stack_usage_lock); + + stack_statistics(&total, &maxusage); + + *reservedp = 0; + *totalp = total; + *spacep = *residentp = total * round_page(KERNEL_STACK_SIZE); + *maxusagep = maxusage; + *maxstackp = 0; + return KERN_SUCCESS; +} + +kern_return_t processor_set_stack_usage( + processor_set_t pset, + unsigned int *totalp, + vm_size_t *spacep, + vm_size_t *residentp, + vm_size_t *maxusagep, + vm_offset_t *maxstackp) +{ + unsigned int total; + vm_size_t maxusage; + vm_offset_t maxstack; + + register thread_t *threads; + register thread_t tmp_thread; + + unsigned int actual; /* this many things */ + unsigned int i; + + vm_size_t size, size_needed; + vm_offset_t addr; + + if (pset == PROCESSOR_SET_NULL) + return KERN_INVALID_ARGUMENT; + + size = 0; addr = 0; + + for (;;) { + pset_lock(pset); + if (!pset->active) { + pset_unlock(pset); + return KERN_INVALID_ARGUMENT; + } + + actual = pset->thread_count; + + /* do we have the memory we need? */ + + size_needed = actual * sizeof(thread_t); + if (size_needed <= size) + break; + + /* unlock the pset and allocate more memory */ + pset_unlock(pset); + + if (size != 0) + kfree(addr, size); + + assert(size_needed > 0); + size = size_needed; + + addr = kalloc(size); + if (addr == 0) + return KERN_RESOURCE_SHORTAGE; + } + + /* OK, have memory and the processor_set is locked & active */ + + threads = (thread_t *) addr; + for (i = 0, tmp_thread = (thread_t) queue_first(&pset->threads); + i < actual; + i++, + tmp_thread = (thread_t) queue_next(&tmp_thread->pset_threads)) { + thread_reference(tmp_thread); + threads[i] = tmp_thread; + } + assert(queue_end(&pset->threads, (queue_entry_t) tmp_thread)); + + /* can unlock processor set now that we have the thread refs */ + pset_unlock(pset); + + /* calculate maxusage and free thread references */ + + total = 0; + maxusage = 0; + maxstack = 0; + for (i = 0; i < actual; i++) { + thread_t thread = threads[i]; + vm_offset_t stack = 0; + + /* + * thread->kernel_stack is only accurate if the + * thread isn't swapped and is not executing. + * + * Of course, we don't have the appropriate locks + * for these shenanigans. + */ + + if ((thread->state & TH_SWAPPED) == 0) { + int cpu; + + stack = thread->kernel_stack; + + for (cpu = 0; cpu < NCPUS; cpu++) + if (active_threads[cpu] == thread) { + stack = active_stacks[cpu]; + break; + } + } + + if (stack != 0) { + total++; + + if (stack_check_usage) { + vm_size_t usage = stack_usage(stack); + + if (usage > maxusage) { + maxusage = usage; + maxstack = (vm_offset_t) thread; + } + } + } + + thread_deallocate(thread); + } + + if (size != 0) + kfree(addr, size); + + *totalp = total; + *residentp = *spacep = total * round_page(KERNEL_STACK_SIZE); + *maxusagep = maxusage; + *maxstackp = maxstack; + return KERN_SUCCESS; +} + +/* + * Useful in the debugger: + */ +void +thread_stats(void) +{ + register thread_t thread; + int total = 0, rpcreply = 0; + + queue_iterate(&default_pset.threads, thread, thread_t, pset_threads) { + total++; + if (thread->ith_rpc_reply != IP_NULL) + rpcreply++; + } + + printf("%d total threads.\n", total); + printf("%d using rpc_reply.\n", rpcreply); +} +#endif /* MACH_DEBUG */ diff --git a/kern/thread.h b/kern/thread.h new file mode 100644 index 0000000..07b7463 --- /dev/null +++ b/kern/thread.h @@ -0,0 +1,371 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: thread.h + * Author: Avadis Tevanian, Jr. + * + * This file contains the structure definitions for threads. + * + */ + +#ifndef _KERN_THREAD_H_ +#define _KERN_THREAD_H_ + +#include <mach_ipc_compat.h> +#include <hw_footprint.h> +#include <mach_fixpri.h> +#include <mach_host.h> +#include <net_atm.h> + +#include <mach/boolean.h> +#include <mach/thread_info.h> +#include <mach/thread_status.h> +#include <mach/machine/vm_types.h> +#include <mach/message.h> +#include <mach/port.h> +#include <mach/vm_prot.h> +#include <kern/ast.h> +#include <kern/cpu_number.h> +#include <kern/queue.h> +#include <kern/pc_sample.h> +#include <kern/processor.h> +#include <kern/sched_prim.h> /* event_t, continuation_t */ +#include <kern/time_out.h> +#include <kern/timer.h> +#include <kern/lock.h> +#include <kern/sched.h> +#include <kern/task.h> /* for current_space(), current_map() */ +#include <machine/thread.h> +#include <ipc/ipc_kmsg_queue.h> + +struct thread { + /* Run queues */ + queue_chain_t links; /* current run queue links */ + run_queue_t runq; /* run queue p is on SEE BELOW */ +/* + * NOTE: The runq field in the thread structure has an unusual + * locking protocol. If its value is RUN_QUEUE_NULL, then it is + * locked by the thread_lock, but if its value is something else + * (i.e. a run_queue) then it is locked by that run_queue's lock. + */ + + /* Task information */ + task_t task; /* Task to which I belong */ + queue_chain_t thread_list; /* list of threads in task */ + + /* Thread bookkeeping */ + queue_chain_t pset_threads; /* list of all threads in proc set*/ + + /* Self-preservation */ + decl_simple_lock_data(,lock) + int ref_count; /* number of references to me */ + + /* Hardware state */ + pcb_t pcb; /* hardware pcb & machine state */ + vm_offset_t kernel_stack; /* accurate only if the thread is + not swapped and not executing */ + vm_offset_t stack_privilege;/* reserved kernel stack */ + + /* Swapping information */ + void (*swap_func)(); /* start here after swapin */ + + /* Blocking information */ + event_t wait_event; /* event we are waiting on */ + int suspend_count; /* internal use only */ + kern_return_t wait_result; /* outcome of wait - + may be examined by this thread + WITHOUT locking */ + boolean_t wake_active; /* someone is waiting for this + thread to become suspended */ + int state; /* Thread state: */ +/* + * Thread states [bits or'ed] + */ +#define TH_WAIT 0x01 /* thread is queued for waiting */ +#define TH_SUSP 0x02 /* thread has been asked to stop */ +#define TH_RUN 0x04 /* thread is running or on runq */ +#define TH_UNINT 0x08 /* thread is waiting uninteruptibly */ +#define TH_HALTED 0x10 /* thread is halted at clean point ? */ + +#define TH_IDLE 0x80 /* thread is an idle thread */ + +#define TH_SCHED_STATE (TH_WAIT|TH_SUSP|TH_RUN|TH_UNINT) + +#define TH_SWAPPED 0x0100 /* thread has no kernel stack */ +#define TH_SW_COMING_IN 0x0200 /* thread is waiting for kernel stack */ + +#define TH_SWAP_STATE (TH_SWAPPED | TH_SW_COMING_IN) + + /* Scheduling information */ + int priority; /* thread's priority */ + int max_priority; /* maximum priority */ + int sched_pri; /* scheduled (computed) priority */ +#if MACH_FIXPRI + int sched_data; /* for use by policy */ + int policy; /* scheduling policy */ +#endif /* MACH_FIXPRI */ + int depress_priority; /* depressed from this priority */ + unsigned int cpu_usage; /* exp. decaying cpu usage [%cpu] */ + unsigned int sched_usage; /* load-weighted cpu usage [sched] */ + unsigned int sched_stamp; /* last time priority was updated */ + + /* VM global variables */ + + vm_offset_t recover; /* page fault recovery (copyin/out) */ + boolean_t vm_privilege; /* Can use reserved memory? */ + + /* User-visible scheduling state */ + int user_stop_count; /* outstanding stops */ + + /* IPC data structures */ + struct thread *ith_next, *ith_prev; + mach_msg_return_t ith_state; + union { + mach_msg_size_t msize; /* max size for recvd msg */ + struct ipc_kmsg *kmsg; /* received message */ + } data; + mach_port_seqno_t ith_seqno; /* seqno of recvd message */ + + /* This queue is used only when destroying messages: + it prevents nasty recursion problems when destroying one message + causes other messages to be destroyed. + This queue should always be empty under normal circumstances. + See ipc_kmsg_destroy() for more details. */ + struct ipc_kmsg_queue ith_messages; + + decl_simple_lock_data(, ith_lock_data) + struct ipc_port *ith_self; /* not a right, doesn't hold ref */ + struct ipc_port *ith_sself; /* a send right */ + struct ipc_port *ith_exception; /* a send right */ +#if MACH_IPC_COMPAT + struct ipc_port *ith_reply; /* a send right */ +#endif /* MACH_IPC_COMPAT */ + + mach_port_t ith_mig_reply; /* reply port for mig */ + struct ipc_port *ith_rpc_reply; /* reply port for kernel RPCs */ + + /* State saved when thread's stack is discarded */ + union { + struct { + mach_msg_header_t *msg; + mach_msg_option_t option; + mach_msg_size_t rcv_size; + mach_msg_timeout_t timeout; + mach_port_t notify; + struct ipc_object *object; + struct ipc_mqueue *mqueue; + } receive; + struct { + struct ipc_port *port; + int exc; + int code; + int subcode; + } exception; + void *other; /* catch-all for other state */ + } saved; + + /* Timing data structures */ + timer_data_t user_timer; /* user mode timer */ + timer_data_t system_timer; /* system mode timer */ + timer_save_data_t user_timer_save; /* saved user timer value */ + timer_save_data_t system_timer_save; /* saved sys timer val. */ + unsigned int cpu_delta; /* cpu usage since last update */ + unsigned int sched_delta; /* weighted cpu usage since update */ + + /* Time-outs */ + timer_elt_data_t timer; /* timer for thread */ + timer_elt_data_t depress_timer; /* timer for priority depression */ + + /* Ast/Halt data structures */ + boolean_t active; /* how alive is the thread */ + int ast; /* ast's needed. See ast.h */ + + /* Processor data structures */ + processor_set_t processor_set; /* assigned processor set */ + processor_t bound_processor; /* bound to processor ?*/ + + sample_control_t pc_sample; + +#if MACH_HOST + boolean_t may_assign; /* may assignment change? */ + boolean_t assign_active; /* someone waiting for may_assign */ +#endif /* MACH_HOST */ + +#if NCPUS > 1 + processor_t last_processor; /* processor this last ran on */ +#endif /* NCPUS > 1 */ + +#if NET_ATM + nw_ep_owned_t nw_ep_waited; +#endif /* NET_ATM */ +}; + +/* typedef of thread_t is in kern/kern_types.h */ +typedef struct thread_shuttle *thread_shuttle_t; +#define THREAD_NULL ((thread_t) 0) +#define THREAD_SHUTTLE_NULL ((thread_shuttle_t)0) + +#define ith_msize data.msize +#define ith_kmsg data.kmsg +#define ith_wait_result wait_result + +#define ith_msg saved.receive.msg +#define ith_option saved.receive.option +#define ith_rcv_size saved.receive.rcv_size +#define ith_timeout saved.receive.timeout +#define ith_notify saved.receive.notify +#define ith_object saved.receive.object +#define ith_mqueue saved.receive.mqueue + +#define ith_port saved.exception.port +#define ith_exc saved.exception.exc +#define ith_exc_code saved.exception.code +#define ith_exc_subcode saved.exception.subcode + +#define ith_other saved.other + +#ifndef _KERN_KERN_TYPES_H_ +typedef struct thread *thread_t; + +#define THREAD_NULL ((thread_t) 0) + +typedef mach_port_t *thread_array_t; +#endif /* _KERN_KERN_TYPES_H_ */ + + +extern thread_t active_threads[NCPUS]; /* active threads */ +extern vm_offset_t active_stacks[NCPUS]; /* active kernel stacks */ + +#ifdef KERNEL +/* + * User routines + */ + +extern kern_return_t thread_create( + task_t parent_task, + thread_t *child_thread); +extern kern_return_t thread_terminate( + thread_t thread); +extern kern_return_t thread_suspend( + thread_t thread); +extern kern_return_t thread_resume( + thread_t thread); +extern kern_return_t thread_abort( + thread_t thread); +extern kern_return_t thread_get_state( + thread_t thread, + int flavor, + thread_state_t old_state, + natural_t *old_state_count); +extern kern_return_t thread_set_state( + thread_t thread, + int flavor, + thread_state_t new_state, + natural_t new_state_count); +extern kern_return_t thread_get_special_port( + thread_t thread, + int which, + struct ipc_port **portp); +extern kern_return_t thread_set_special_port( + thread_t thread, + int which, + struct ipc_port *port); +extern kern_return_t thread_info( + thread_t thread, + int flavor, + thread_info_t thread_info_out, + natural_t *thread_info_count); +extern kern_return_t thread_assign( + thread_t thread, + processor_set_t new_pset); +extern kern_return_t thread_assign_default( + thread_t thread); +#endif + +/* + * Kernel-only routines + */ + +extern void thread_init(void); +extern void thread_reference(thread_t); +extern void thread_deallocate(thread_t); +extern void thread_hold(thread_t); +extern kern_return_t thread_dowait( + thread_t thread, + boolean_t must_halt); +extern void thread_release(thread_t); +extern kern_return_t thread_halt( + thread_t thread, + boolean_t must_halt); +extern void thread_halt_self(void); +extern void thread_force_terminate(thread_t); +extern void thread_set_own_priority( + int priority); +extern thread_t kernel_thread( + task_t task, + void (*start)(void), + void * arg); + +extern void reaper_thread(void); + +#if MACH_HOST +extern void thread_freeze( + thread_t thread); +extern void thread_doassign( + thread_t thread, + processor_set_t new_pset, + boolean_t release_freeze); +extern void thread_unfreeze( + thread_t thread); +#endif /* MACH_HOST */ + +/* + * Macro-defined routines + */ + +#define thread_pcb(th) ((th)->pcb) + +#define thread_lock(th) simple_lock(&(th)->lock) +#define thread_unlock(th) simple_unlock(&(th)->lock) + +#define thread_should_halt(thread) \ + ((thread)->ast & (AST_HALT|AST_TERMINATE)) + +/* + * Machine specific implementations of the current thread macro + * designate this by defining CURRENT_THREAD. + */ +#ifndef CURRENT_THREAD +#define current_thread() (active_threads[cpu_number()]) +#endif /* CURRENT_THREAD */ + +#define current_stack() (active_stacks[cpu_number()]) + +#define current_task() (current_thread()->task) +#define current_space() (current_task()->itk_space) +#define current_map() (current_task()->map) + +#endif /* _KERN_THREAD_H_ */ diff --git a/kern/thread_swap.c b/kern/thread_swap.c new file mode 100644 index 0000000..173b6ae --- /dev/null +++ b/kern/thread_swap.c @@ -0,0 +1,190 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * + * File: kern/thread_swap.c + * Author: Avadis Tevanian, Jr. + * Date: 1987 + * + * Mach thread swapper: + * Find idle threads to swap, freeing up kernel stack resources + * at the expense of allowing them to execute. + * + * Swap in threads that need to be run. This is done here + * by the swapper thread since it cannot be done (in general) + * when the kernel tries to place a thread on a run queue. + * + * Note: The act of swapping a thread in Mach does not mean that + * its memory gets forcibly swapped to secondary storage. The memory + * for the task corresponding to a swapped thread is paged out + * through the normal paging mechanism. + * + */ + +#include <ipc/ipc_kmsg.h> +#include <kern/counters.h> +#include <kern/thread.h> +#include <kern/lock.h> +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <mach/vm_param.h> +#include <kern/sched_prim.h> +#include <kern/processor.h> +#include <kern/thread_swap.h> +#include <machine/machspl.h> /* for splsched */ + + + +queue_head_t swapin_queue; +decl_simple_lock_data(, swapper_lock_data) + +#define swapper_lock() simple_lock(&swapper_lock_data) +#define swapper_unlock() simple_unlock(&swapper_lock_data) + +/* + * swapper_init: [exported] + * + * Initialize the swapper module. + */ +void swapper_init() +{ + queue_init(&swapin_queue); + simple_lock_init(&swapper_lock_data); +} + +/* + * thread_swapin: [exported] + * + * Place the specified thread in the list of threads to swapin. It + * is assumed that the thread is locked, therefore we are at splsched. + * + * We don't bother with stack_alloc_try to optimize swapin; + * our callers have already tried that route. + */ + +void thread_swapin(thread) + thread_t thread; +{ + switch (thread->state & TH_SWAP_STATE) { + case TH_SWAPPED: + /* + * Swapped out - queue for swapin thread. + */ + thread->state = (thread->state & ~TH_SWAP_STATE) + | TH_SW_COMING_IN; + swapper_lock(); + enqueue_tail(&swapin_queue, (queue_entry_t) thread); + swapper_unlock(); + thread_wakeup((event_t) &swapin_queue); + break; + + case TH_SW_COMING_IN: + /* + * Already queued for swapin thread, or being + * swapped in. + */ + break; + + default: + /* + * Already swapped in. + */ + panic("thread_swapin"); + } +} + +/* + * thread_doswapin: + * + * Swapin the specified thread, if it should be runnable, then put + * it on a run queue. No locks should be held on entry, as it is + * likely that this routine will sleep (waiting for stack allocation). + */ +void thread_doswapin(thread) + register thread_t thread; +{ + spl_t s; + + /* + * Allocate the kernel stack. + */ + + stack_alloc(thread, thread_continue); + + /* + * Place on run queue. + */ + + s = splsched(); + thread_lock(thread); + thread->state &= ~(TH_SWAPPED | TH_SW_COMING_IN); + if (thread->state & TH_RUN) + thread_setrun(thread, TRUE); + thread_unlock(thread); + (void) splx(s); +} + +/* + * swapin_thread: [exported] + * + * This procedure executes as a kernel thread. Threads that need to + * be swapped in are swapped in by this thread. + */ +void swapin_thread_continue() +{ + for (;;) { + register thread_t thread; + spl_t s; + + s = splsched(); + swapper_lock(); + + while ((thread = (thread_t) dequeue_head(&swapin_queue)) + != THREAD_NULL) { + swapper_unlock(); + (void) splx(s); + + thread_doswapin(thread); /* may block */ + + s = splsched(); + swapper_lock(); + } + + assert_wait((event_t) &swapin_queue, FALSE); + swapper_unlock(); + (void) splx(s); + counter(c_swapin_thread_block++); + thread_block(swapin_thread_continue); + } +} + +void swapin_thread() +{ + stack_privilege(current_thread()); + + swapin_thread_continue(); + /*NOTREACHED*/ +} diff --git a/kern/thread_swap.h b/kern/thread_swap.h new file mode 100644 index 0000000..e390ac4 --- /dev/null +++ b/kern/thread_swap.h @@ -0,0 +1,44 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/thread_swap.h + * + * Declarations of thread swapping routines. + */ + +#ifndef _KERN_THREAD_SWAP_H_ +#define _KERN_THREAD_SWAP_H_ + +/* + * exported routines + */ +extern void swapper_init(); +extern void thread_swapin( /* thread_t thread */ ); +extern void thread_doswapin( /* thread_t thread */ ); +extern void swapin_thread(); +extern void thread_swapout( /* thread_t thread */ ); + +#endif _KERN_THREAD_SWAP_H_ diff --git a/kern/time_out.h b/kern/time_out.h new file mode 100644 index 0000000..4dff7df --- /dev/null +++ b/kern/time_out.h @@ -0,0 +1,83 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_TIME_OUT_H_ +#define _KERN_TIME_OUT_H_ + +/* + * Mach time-out and time-of-day facility. + */ + +#include <mach/boolean.h> +#include <kern/lock.h> +#include <kern/queue.h> +#include <kern/zalloc.h> + +/* + * Timers in kernel: + */ +extern unsigned long elapsed_ticks; /* number of ticks elapsed since bootup */ +extern int hz; /* number of ticks per second */ +extern int tick; /* number of usec per tick */ + +/* + * Time-out element. + */ +struct timer_elt { + queue_chain_t chain; /* chain in order of expiration */ + int (*fcn)(); /* function to call */ + char * param; /* with this parameter */ + unsigned long ticks; /* expiration time, in ticks */ + int set; /* unset | set | allocated */ +}; +#define TELT_UNSET 0 /* timer not set */ +#define TELT_SET 1 /* timer set */ +#define TELT_ALLOC 2 /* timer allocated from pool */ + +typedef struct timer_elt timer_elt_data_t; +typedef struct timer_elt *timer_elt_t; + +/* for 'private' timer elements */ +extern void set_timeout(); +extern boolean_t reset_timeout(); + +/* for public timer elements */ +extern void timeout(); +extern boolean_t untimeout(); + +#define set_timeout_setup(telt,fcn,param,interval) \ + ((telt)->fcn = (fcn), \ + (telt)->param = (param), \ + (telt)->private = TRUE, \ + set_timeout((telt), (interval))) + +#define reset_timeout_check(t) \ + MACRO_BEGIN \ + if ((t)->set) \ + reset_timeout((t)); \ + MACRO_END + +#endif _KERN_TIME_OUT_H_ diff --git a/kern/time_stamp.c b/kern/time_stamp.c new file mode 100644 index 0000000..6e22155 --- /dev/null +++ b/kern/time_stamp.c @@ -0,0 +1,74 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/std_types.h> +#include <sys/time.h> +#include <kern/time_stamp.h> + +/* + * ts.c - kern_timestamp system call. + */ +#ifdef multimax +#include <mmax/timer.h> +#endif multimax + + + +kern_return_t +kern_timestamp(tsp) +struct tsval *tsp; +{ +#ifdef multimax + struct tsval temp; + temp.low_val = FRcounter; + temp.high_val = 0; +#else multimax +/* + temp.low_val = 0; + temp.high_val = ts_tick_count; +*/ + time_value_t temp; + temp = time; +#endif multimax + + if (copyout((char *)&temp, + (char *)tsp, + sizeof(struct tsval)) != KERN_SUCCESS) + return(KERN_INVALID_ADDRESS); + return(KERN_SUCCESS); +} + +/* + * Initialization procedure. + */ + +void timestamp_init() +{ +#ifdef multimax +#else multimax + ts_tick_count = 0; +#endif multimax +} diff --git a/kern/time_stamp.h b/kern/time_stamp.h new file mode 100644 index 0000000..81711f6 --- /dev/null +++ b/kern/time_stamp.h @@ -0,0 +1,65 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_TIME_STAMP_H_ +#define _KERN_TIME_STAMP_H_ + +#include <machine/time_stamp.h> +/* + * time_stamp.h -- definitions for low-overhead timestamps. + */ + +struct tsval { + unsigned low_val; /* least significant word */ + unsigned high_val; /* most significant word */ +}; + +/* + * Format definitions. + */ + +#ifndef TS_FORMAT +/* + * Default case - Just return a tick count for machines that + * don't support or haven't implemented this. Assume 100Hz ticks. + * + * low_val - Always 0. + * high_val - tick count. + */ +#define TS_FORMAT 1 + +#if KERNEL +unsigned ts_tick_count; +#endif KERNEL +#endif TS_FORMAT + +/* + * List of all format definitions for convert_ts_to_tv. + */ + +#define TS_FORMAT_DEFAULT 1 +#define TS_FORMAT_MMAX 2 +#endif _KERN_TIME_STAMP_H_ diff --git a/kern/timer.c b/kern/timer.c new file mode 100644 index 0000000..57772ee --- /dev/null +++ b/kern/timer.c @@ -0,0 +1,525 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <cpus.h> +#include <stat_time.h> + +#include <mach/kern_return.h> +#include <mach/port.h> +#include <kern/queue.h> +#include <kern/thread.h> +#include <mach/time_value.h> +#include <kern/timer.h> +#include <kern/cpu_number.h> + +#include <kern/assert.h> +#include <kern/macro_help.h> + + + +timer_t current_timer[NCPUS]; +timer_data_t kernel_timer[NCPUS]; + +void timer_init(); /* forward */ + +/* + * init_timers initializes all non-thread timers and puts the + * service routine on the callout queue. All timers must be + * serviced by the callout routine once an hour. + */ +void init_timers() +{ + register int i; + register timer_t this_timer; + + /* + * Initialize all the kernel timers and start the one + * for this cpu (master) slaves start theirs later. + */ + this_timer = &kernel_timer[0]; + for ( i=0 ; i<NCPUS ; i++, this_timer++) { + timer_init(this_timer); + current_timer[i] = (timer_t) 0; + } + + start_timer(&kernel_timer[cpu_number()]); +} + +/* + * timer_init initializes a single timer. + */ +void timer_init(this_timer) +register +timer_t this_timer; +{ + this_timer->low_bits = 0; + this_timer->high_bits = 0; + this_timer->tstamp = 0; + this_timer->high_bits_check = 0; +} + +#if STAT_TIME +#else STAT_TIME + +#ifdef MACHINE_TIMER_ROUTINES + +/* + * Machine-dependent code implements the timer routines. + */ + +#else /* MACHINE_TIMER_ROUTINES */ + +/* + * start_timer starts the given timer for this cpu. It is called + * exactly once for each cpu during the boot sequence. + */ +void +start_timer(timer) +timer_t timer; +{ + timer->tstamp = get_timestamp(); + current_timer[cpu_number()] = timer; +} + +/* + * time_trap_uentry does trap entry timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. Must only be called if trap was + * from user mode. + */ +void +time_trap_uentry(ts) +unsigned ts; +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif TIMER_MAX + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); + } + + /* + * Record new timer. + */ + mytimer = &(active_threads[mycpu]->system_timer); + current_timer[mycpu] = mytimer; + mytimer->tstamp = ts; +} + +/* + * time_trap_uexit does trap exit timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. Must only be called if returning to + * user mode. + */ +void +time_trap_uexit(ts) +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif TIMER_MAX + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); /* SYSTEMMODE */ + } + + mytimer = &(active_threads[mycpu]->user_timer); + + /* + * Record new timer. + */ + current_timer[mycpu] = mytimer; + mytimer->tstamp = ts; +} + +/* + * time_int_entry does interrupt entry timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. new_timer is the new timer to + * switch to. This routine returns the currently running timer, + * which MUST be pushed onto the stack by the caller, or otherwise + * saved for time_int_exit. + */ +timer_t +time_int_entry(ts,new_timer) +unsigned ts; +timer_t new_timer; +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif TIMER_MAX + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + /* + * Switch to new timer, and save old one on stack. + */ + new_timer->tstamp = ts; + current_timer[mycpu] = new_timer; + return(mytimer); +} + +/* + * time_int_exit does interrupt exit timing. Caller must lock out + * interrupts and take a timestamp. ts is a timestamp taken after + * interrupts were locked out. old_timer is the timer value pushed + * onto the stack or otherwise saved after time_int_entry returned + * it. + */ +void +time_int_exit(ts, old_timer) +unsigned ts; +timer_t old_timer; +{ + int elapsed; + int mycpu; + timer_t mytimer; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif TIMER_MAX + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + /* + * If normalization requested, do it. + */ + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); + } + if (old_timer->low_bits & TIMER_LOW_FULL) { + timer_normalize(old_timer); + } + + /* + * Start timer that was running before interrupt. + */ + old_timer->tstamp = ts; + current_timer[mycpu] = old_timer; +} + +/* + * timer_switch switches to a new timer. The machine + * dependent routine/macro get_timestamp must return a timestamp. + * Caller must lock out interrupts. + */ +void +timer_switch(new_timer) +timer_t new_timer; +{ + int elapsed; + int mycpu; + timer_t mytimer; + unsigned ts; + + /* + * Calculate elapsed time. + */ + mycpu = cpu_number(); + mytimer = current_timer[mycpu]; + ts = get_timestamp(); + elapsed = ts - mytimer->tstamp; +#ifdef TIMER_MAX + if (elapsed < 0) elapsed += TIMER_MAX; +#endif TIMER_MAX + + /* + * Update current timer. + */ + mytimer->low_bits += elapsed; + mytimer->tstamp = 0; + + /* + * Normalization check + */ + if (mytimer->low_bits & TIMER_LOW_FULL) { + timer_normalize(mytimer); + } + + /* + * Record new timer. + */ + current_timer[mycpu] = new_timer; + new_timer->tstamp = ts; +} + +#endif /* MACHINE_TIMER_ROUTINES */ +#endif STAT_TIME + +/* + * timer_normalize normalizes the value of a timer. It is + * called only rarely, to make sure low_bits never overflows. + */ +void timer_normalize(timer) +register +timer_t timer; +{ + unsigned int high_increment; + + /* + * Calculate high_increment, then write high check field first + * followed by low and high. timer_grab() reads these fields in + * reverse order so if high and high check match, we know + * that the values read are ok. + */ + + high_increment = timer->low_bits/TIMER_HIGH_UNIT; + timer->high_bits_check += high_increment; + timer->low_bits %= TIMER_HIGH_UNIT; + timer->high_bits += high_increment; +} + +/* + * timer_grab() retrieves the value of a timer. + * + * Critical scheduling code uses TIMER_DELTA macro in timer.h + * (called from thread_timer_delta in sched.h). + * + * Keep coherent with db_time_grab below. + */ + +static void timer_grab(timer, save) +timer_t timer; +timer_save_t save; +{ +#if MACH_ASSERT + unsigned int passes=0; +#endif + do { + (save)->high = (timer)->high_bits; + (save)->low = (timer)->low_bits; + /* + * If the timer was normalized while we were doing this, + * the high_bits value read above and the high_bits check + * value will not match because high_bits_check is the first + * field touched by the normalization procedure, and + * high_bits is the last. + * + * Additions to timer only touch low bits and + * are therefore atomic with respect to this. + */ +#if MACH_ASSERT + passes++; + assert((passes < 10000) ? (1) : ((timer->high_bits_check = save->high), 0)); +#endif + } while ( (save)->high != (timer)->high_bits_check); +} + +/* + * + * Db_timer_grab(): used by db_thread_read_times. An nonblocking + * version of db_thread_get_times. Keep coherent with timer_grab + * above. + * + */ +void db_timer_grab(timer, save) +timer_t timer; +timer_save_t save; +{ + /* Don't worry about coherency */ + + (save)->high = (timer)->high_bits; + (save)->low = (timer)->low_bits; +} + + +/* + * timer_read reads the value of a timer into a time_value_t. If the + * timer was modified during the read, retry. The value returned + * is accurate to the last update; time accumulated by a running + * timer since its last timestamp is not included. + */ + +void +timer_read(timer, tv) +timer_t timer; +register +time_value_t *tv; +{ + timer_save_data_t temp; + + timer_grab(timer,&temp); + /* + * Normalize the result + */ +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif TIMER_ADJUST + tv->seconds = temp.high + temp.low/1000000; + tv->microseconds = temp.low%1000000; + +} + +/* + * thread_read_times reads the user and system times from a thread. + * Time accumulated since last timestamp is not included. Should + * be called at splsched() to avoid having user and system times + * be out of step. Doesn't care if caller locked thread. + * + * Needs to be kept coherent with thread_read_times ahead. + */ +void thread_read_times(thread, user_time_p, system_time_p) + thread_t thread; + time_value_t *user_time_p; + time_value_t *system_time_p; +{ + timer_save_data_t temp; + register timer_t timer; + + timer = &thread->user_timer; + timer_grab(timer, &temp); + +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif TIMER_ADJUST + user_time_p->seconds = temp.high + temp.low/1000000; + user_time_p->microseconds = temp.low % 1000000; + + timer = &thread->system_timer; + timer_grab(timer, &temp); + +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif TIMER_ADJUST + system_time_p->seconds = temp.high + temp.low/1000000; + system_time_p->microseconds = temp.low % 1000000; +} + +/* + * Db_thread_read_times: A version of thread_read_times that + * can be called by the debugger. This version does not call + * timer_grab, which can block. Please keep it up to date with + * thread_read_times above. + * + */ +void db_thread_read_times(thread, user_time_p, system_time_p) + thread_t thread; + time_value_t *user_time_p; + time_value_t *system_time_p; +{ + timer_save_data_t temp; + register timer_t timer; + + timer = &thread->user_timer; + db_timer_grab(timer, &temp); + +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif TIMER_ADJUST + user_time_p->seconds = temp.high + temp.low/1000000; + user_time_p->microseconds = temp.low % 1000000; + + timer = &thread->system_timer; + timer_grab(timer, &temp); + +#ifdef TIMER_ADJUST + TIMER_ADJUST(&temp); +#endif TIMER_ADJUST + system_time_p->seconds = temp.high + temp.low/1000000; + system_time_p->microseconds = temp.low % 1000000; +} + +/* + * timer_delta takes the difference of a saved timer value + * and the current one, and updates the saved value to current. + * The difference is returned as a function value. See + * TIMER_DELTA macro (timer.h) for optimization to this. + */ + +unsigned +timer_delta(timer, save) +register +timer_t timer; +timer_save_t save; +{ + timer_save_data_t new_save; + register unsigned result; + + timer_grab(timer,&new_save); + result = (new_save.high - save->high) * TIMER_HIGH_UNIT + + new_save.low - save->low; + save->high = new_save.high; + save->low = new_save.low; + return(result); +} diff --git a/kern/timer.h b/kern/timer.h new file mode 100644 index 0000000..fe60e26 --- /dev/null +++ b/kern/timer.h @@ -0,0 +1,157 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _KERN_TIMER_H_ +#define _KERN_TIMER_H_ + +#include <cpus.h> +#include <stat_time.h> + +#include <kern/macro_help.h> + +#if STAT_TIME +/* + * Statistical timer definitions - use microseconds in timer, seconds + * in high unit field. No adjustment needed to convert to time_value_t + * as a result. Service timers once an hour. + */ + +#define TIMER_RATE 1000000 +#define TIMER_HIGH_UNIT TIMER_RATE +#undef TIMER_ADJUST + +#else STAT_TIME +/* + * Machine dependent definitions based on hardware support. + */ + +#include <machine/timer.h> + +#endif STAT_TIME + +/* + * Definitions for accurate timers. high_bits_check is a copy of + * high_bits that allows reader to verify that values read are ok. + */ + +struct timer { + unsigned low_bits; + unsigned high_bits; + unsigned high_bits_check; + unsigned tstamp; +}; + +typedef struct timer timer_data_t; +typedef struct timer *timer_t; + +/* + * Mask to check if low_bits is in danger of overflowing + */ + +#define TIMER_LOW_FULL 0x80000000U + +/* + * Kernel timers and current timer array. [Exported] + */ + +extern timer_t current_timer[NCPUS]; +extern timer_data_t kernel_timer[NCPUS]; + +/* + * save structure for timer readings. This is used to save timer + * readings for elapsed time computations. + */ + +struct timer_save { + unsigned low; + unsigned high; +}; + +typedef struct timer_save timer_save_data_t, *timer_save_t; + +/* + * Exported kernel interface to timers + */ + +#if STAT_TIME +#define start_timer(timer) +#define timer_switch(timer) +#else STAT_TIME +extern void start_timer(); +extern void timer_switch(); +#endif STAT_TIME + +extern void timer_read(); +extern void thread_read_times(); +extern unsigned timer_delta(); + +#if STAT_TIME +/* + * Macro to bump timer values. + */ +#define timer_bump(timer, usec) \ +MACRO_BEGIN \ + (timer)->low_bits += usec; \ + if ((timer)->low_bits & TIMER_LOW_FULL) { \ + timer_normalize(timer); \ + } \ +MACRO_END + +#else STAT_TIME +/* + * Exported hardware interface to timers + */ +extern void time_trap_uentry(); +extern void time_trap_uexit(); +extern timer_t time_int_entry(); +extern void time_int_exit(); +#endif STAT_TIME + +/* + * TIMER_DELTA finds the difference between a timer and a saved value, + * and updates the saved value. Look at high_bits check field after + * reading low because that's the first written by a normalize + * operation; this isn't necessary for current usage because + * this macro is only used when the timer can't be normalized: + * thread is not running, or running thread calls it on itself at + * splsched(). + */ + +#define TIMER_DELTA(timer, save, result) \ +MACRO_BEGIN \ + register unsigned temp; \ + \ + temp = (timer).low_bits; \ + if ((save).high != (timer).high_bits_check) { \ + result += timer_delta(&(timer), &(save)); \ + } \ + else { \ + result += temp - (save).low; \ + (save).low = temp; \ + } \ +MACRO_END + +#endif _KERN_TIMER_H_ diff --git a/kern/xpr.c b/kern/xpr.c new file mode 100644 index 0000000..eb8d6be --- /dev/null +++ b/kern/xpr.c @@ -0,0 +1,192 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach_kdb.h> +/* + * xpr silent tracing circular buffer. + */ +#include <kern/xpr.h> +#include <kern/lock.h> +#include "cpu_number.h" +#include <machine/machspl.h> +#include <vm/vm_kern.h> + + +/* + * After a spontaneous reboot, it is desirable to look + * at the old xpr buffer. Assuming xprbootstrap allocates + * the buffer in the same place in physical memory and + * the reboot doesn't clear memory, this should work. + * xprptr will be reset, but the saved value should be OK. + * Just set xprenable false so the buffer isn't overwritten. + */ + +decl_simple_lock_data(, xprlock) + +boolean_t xprenable = TRUE; /* Enable xpr tracing */ +int nxprbufs = 0; /* Number of contiguous xprbufs allocated */ +int xprflags = 0; /* Bit mask of xpr flags enabled */ +struct xprbuf *xprbase; /* Pointer to circular buffer nxprbufs*sizeof(xprbuf)*/ +struct xprbuf *xprptr; /* Currently allocated xprbuf */ +struct xprbuf *xprlast; /* Pointer to end of circular buffer */ + +/*VARARGS1*/ +void xpr(msg, arg1, arg2, arg3, arg4, arg5) +char *msg; +int arg1, arg2, arg3, arg4, arg5; +{ + register spl_t s; + register struct xprbuf *x; + + /* If we aren't initialized, ignore trace request */ + if (!xprenable || (xprptr == 0)) + return; + /* Guard against all interrupts and allocate next buffer. */ + s = splhigh(); + simple_lock(&xprlock); + x = xprptr++; + if (xprptr >= xprlast) { + /* wrap around */ + xprptr = xprbase; + } + /* Save xprptr in allocated memory. */ + *(struct xprbuf **)xprlast = xprptr; + simple_unlock(&xprlock); + splx(s); + x->msg = msg; + x->arg1 = arg1; + x->arg2 = arg2; + x->arg3 = arg3; + x->arg4 = arg4; + x->arg5 = arg5; + x->timestamp = XPR_TIMESTAMP; + x->cpuinfo = cpu_number(); +} + +void xprbootstrap() +{ + vm_offset_t addr; + vm_size_t size; + kern_return_t kr; + + simple_lock_init(&xprlock); + if (nxprbufs == 0) + return; /* assume XPR support not desired */ + + /* leave room at the end for a saved copy of xprptr */ + size = nxprbufs * sizeof(struct xprbuf) + sizeof xprptr; + + kr = kmem_alloc_wired(kernel_map, &addr, size); + if (kr != KERN_SUCCESS) + panic("xprbootstrap"); + + if (xprenable) { + /* + * If xprenable is set (the default) then we zero + * the buffer so xpr_dump doesn't encounter bad pointers. + * If xprenable isn't set, then we preserve + * the original contents of the buffer. This is useful + * if memory survives reboots, so xpr_dump can show + * the previous buffer contents. + */ + + bzero((char *) addr, size); + } + + xprbase = (struct xprbuf *) addr; + xprlast = &xprbase[nxprbufs]; + xprptr = xprbase; /* setting xprptr enables tracing */ +} + +int xprinitial = 0; + +void xprinit() +{ + xprflags |= xprinitial; +} + +#if MACH_KDB +#include <machine/setjmp.h> + + +extern void db_printf(); +extern jmp_buf_t *db_recover; + +/* + * Print current content of xpr buffers (KDB's sake) + * Use stack order to make it understandable. + * + * Called as "!xpr_dump" this dumps the kernel's xpr buffer. + * Called with arguments, it can dump xpr buffers in user tasks, + * assuming they use the same format as the kernel. + */ +void xpr_dump(base, nbufs) + struct xprbuf *base; + int nbufs; +{ + jmp_buf_t db_jmpbuf; + jmp_buf_t *prev; + struct xprbuf *last, *ptr; + register struct xprbuf *x; + int i; + spl_t s; + + if (base == 0) { + base = xprbase; + nbufs = nxprbufs; + } + + if (nbufs == 0) + return; + + if (base == xprbase) { + s = splhigh(); + simple_lock(&xprlock); + } + + last = base + nbufs; + ptr = * (struct xprbuf **) last; + + prev = db_recover; + if (_setjmp(db_recover = &db_jmpbuf) == 0) + for (x = ptr, i = 0; i < nbufs; i++) { + if (--x < base) + x = last - 1; + + if (x->msg == 0) + break; + + db_printf("<%d:%x:%x> ", x - base, x->cpuinfo, x->timestamp); + db_printf(x->msg, x->arg1,x->arg2,x->arg3,x->arg4,x->arg5); + } + db_recover = prev; + + if (base == xprbase) { + simple_unlock(&xprlock); + (void) splx(s); + } +} +#endif MACH_KDB diff --git a/kern/xpr.h b/kern/xpr.h new file mode 100644 index 0000000..5a95555 --- /dev/null +++ b/kern/xpr.h @@ -0,0 +1,101 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Include file for xpr circular buffer silent tracing. + * + */ +/* + * If the kernel flag XPRDEBUG is set, the XPR macro is enabled. The + * macro should be invoked something like the following: + * XPR(XPR_SYSCALLS, ("syscall: %d, 0x%x\n", syscallno, arg1); + * which will expand into the following code: + * if (xprflags & XPR_SYSCALLS) + * xpr("syscall: %d, 0x%x\n", syscallno, arg1); + * Xpr will log the pointer to the printf string and up to 6 arguements, + * along with a timestamp and cpuinfo (for multi-processor systems), into + * a circular buffer. The actual printf processing is delayed until after + * the buffer has been collected. It is assumed that the text/data segments + * of the kernel can easily be reconstructed in a post-processor which + * performs the printf processing. + * + * If the XPRDEBUG compilation switch is not set, the XPR macro expands + * to nothing. + */ + +#ifndef _KERN_XPR_H_ +#define _KERN_XPR_H_ + +#ifdef KERNEL +#include <xpr_debug.h> +#else KERNEL +#include <sys/features.h> +#endif KERNEL + +#include <machine/xpr.h> + +#if XPR_DEBUG + +#define XPR(flags,xprargs) if(xprflags&flags) xpr xprargs + +extern int xprflags; +/* + * flags for message types. + */ +#define XPR_SYSCALLS 0x00000001 +#define XPR_TRAPS 0x00000002 +#define XPR_SCHED 0x00000004 +#define XPR_NPTCP 0x00000008 +#define XPR_NP 0x00000010 +#define XPR_TCP 0x00000020 + +#define XPR_VM_OBJECT (1 << 8) +#define XPR_VM_OBJECT_CACHE (1 << 9) +#define XPR_VM_PAGE (1 << 10) +#define XPR_VM_PAGEOUT (1 << 11) +#define XPR_MEMORY_OBJECT (1 << 12) +#define XPR_VM_FAULT (1 << 13) +#define XPR_INODE_PAGER (1 << 14) +#define XPR_INODE_PAGER_DATA (1 << 15) + +#else XPR_DEBUG +#define XPR(flags,xprargs) +#endif XPR_DEBUG + +struct xprbuf { + char *msg; + int arg1,arg2,arg3,arg4,arg5; + int timestamp; + int cpuinfo; +}; + +#ifndef WANT_PROTOTYPES +extern void xpr(); +#endif +extern void xpr_dump(); +extern void xprinit(); +extern void xprbootstrap(); + +#endif _KERN_XPR_H_ diff --git a/kern/zalloc.c b/kern/zalloc.c new file mode 100644 index 0000000..a6421cd --- /dev/null +++ b/kern/zalloc.c @@ -0,0 +1,971 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/zalloc.c + * Author: Avadis Tevanian, Jr. + * + * Zone-based memory allocator. A zone is a collection of fixed size + * data blocks for which quick allocation/deallocation is possible. + */ + +#include <kern/macro_help.h> +#include <kern/sched.h> +#include <kern/time_out.h> +#include <kern/zalloc.h> +#include <mach/vm_param.h> +#include <vm/vm_kern.h> +#include <machine/machspl.h> + +#include <mach_debug.h> +#if MACH_DEBUG +#include <mach/kern_return.h> +#include <mach/machine/vm_types.h> +#include <mach_debug/zone_info.h> +#include <kern/host.h> +#include <vm/vm_map.h> +#include <vm/vm_user.h> +#include <vm/vm_kern.h> +#endif + +#define ADD_TO_ZONE(zone, element) \ +MACRO_BEGIN \ + *((vm_offset_t *)(element)) = (zone)->free_elements; \ + (zone)->free_elements = (vm_offset_t) (element); \ + zone_count_down(zone); \ +MACRO_END + +#define REMOVE_FROM_ZONE(zone, ret, type) \ +MACRO_BEGIN \ + (ret) = (type) (zone)->free_elements; \ + if ((ret) != (type) 0) { \ + zone_count_up(zone); \ + (zone)->free_elements = *((vm_offset_t *)(ret)); \ + } \ +MACRO_END + +/* + * Support for garbage collection of unused zone pages: + */ + +struct zone_page_table_entry { + struct zone_page_table_entry *next; + short in_free_list; + short alloc_count; +}; + +extern struct zone_page_table_entry * zone_page_table; +extern vm_offset_t zone_map_min_address; + +#define lock_zone_page_table() simple_lock(&zone_page_table_lock) +#define unlock_zone_page_table() simple_unlock(&zone_page_table_lock) + +#define zone_page(addr) \ + (&(zone_page_table[(atop(((vm_offset_t)addr) - zone_map_min_address))])) + + +extern void zone_page_alloc(); +extern void zone_page_dealloc(); +extern void zone_page_in_use(); +extern void zone_page_free(); + +zone_t zone_zone; /* this is the zone containing other zones */ + +boolean_t zone_ignore_overflow = TRUE; + +vm_map_t zone_map = VM_MAP_NULL; +vm_size_t zone_map_size = 12 * 1024 * 1024; + +/* + * The VM system gives us an initial chunk of memory. + * It has to be big enough to allocate the zone_zone + * and some initial kernel data structures, like kernel maps. + * It is advantageous to make it bigger than really necessary, + * because this memory is more efficient than normal kernel + * virtual memory. (It doesn't have vm_page structures backing it + * and it may have other machine-dependent advantages.) + * So for best performance, zdata_size should approximate + * the amount of memory you expect the zone system to consume. + */ + +vm_offset_t zdata; +vm_size_t zdata_size = 420 * 1024; + +#define zone_lock(zone) \ +MACRO_BEGIN \ + if (zone->type & ZONE_PAGEABLE) { \ + lock_write(&zone->complex_lock); \ + } else { \ + simple_lock(&zone->lock); \ + } \ +MACRO_END + +#define zone_unlock(zone) \ +MACRO_BEGIN \ + if (zone->type & ZONE_PAGEABLE) { \ + lock_done(&zone->complex_lock); \ + } else { \ + simple_unlock(&zone->lock); \ + } \ +MACRO_END + +#define zone_lock_init(zone) \ +MACRO_BEGIN \ + if (zone->type & ZONE_PAGEABLE) { \ + lock_init(&zone->complex_lock, TRUE); \ + } else { \ + simple_lock_init(&zone->lock); \ + } \ +MACRO_END + +static vm_offset_t zget_space(); + +decl_simple_lock_data(,zget_space_lock) +vm_offset_t zalloc_next_space; +vm_offset_t zalloc_end_of_space; +vm_size_t zalloc_wasted_space; + +/* + * Garbage collection map information + */ +decl_simple_lock_data(,zone_page_table_lock) +struct zone_page_table_entry * zone_page_table; +vm_offset_t zone_map_min_address; +vm_offset_t zone_map_max_address; +int zone_pages; + +extern void zone_page_init(); + +#define ZONE_PAGE_USED 0 +#define ZONE_PAGE_UNUSED -1 + + +/* + * Protects first_zone, last_zone, num_zones, + * and the next_zone field of zones. + */ +decl_simple_lock_data(,all_zones_lock) +zone_t first_zone; +zone_t *last_zone; +int num_zones; + +/* + * zinit initializes a new zone. The zone data structures themselves + * are stored in a zone, which is initially a static structure that + * is initialized by zone_init. + */ +zone_t zinit(size, max, alloc, memtype, name) + vm_size_t size; /* the size of an element */ + vm_size_t max; /* maximum memory to use */ + vm_size_t alloc; /* allocation size */ + unsigned int memtype; /* flags specifying type of memory */ + char *name; /* a name for the zone */ +{ + register zone_t z; + + if (zone_zone == ZONE_NULL) + z = (zone_t) zget_space(sizeof(struct zone)); + else + z = (zone_t) zalloc(zone_zone); + if (z == ZONE_NULL) + panic("zinit"); + + if (alloc == 0) + alloc = PAGE_SIZE; + + if (size == 0) + size = sizeof(z->free_elements); + /* + * Round off all the parameters appropriately. + */ + + if ((max = round_page(max)) < (alloc = round_page(alloc))) + max = alloc; + + z->free_elements = 0; + z->cur_size = 0; + z->max_size = max; + z->elem_size = ((size-1) + sizeof(z->free_elements)) - + ((size-1) % sizeof(z->free_elements)); + + z->alloc_size = alloc; + z->type = memtype; + z->zone_name = name; +#ifdef ZONE_COUNT + z->count = 0; +#endif + z->doing_alloc = FALSE; + zone_lock_init(z); + + /* + * Add the zone to the all-zones list. + */ + + z->next_zone = ZONE_NULL; + simple_lock(&all_zones_lock); + *last_zone = z; + last_zone = &z->next_zone; + num_zones++; + simple_unlock(&all_zones_lock); + + return(z); +} + +/* + * Cram the given memory into the specified zone. + */ +void zcram(zone_t zone, vm_offset_t newmem, vm_size_t size) +{ + register vm_size_t elem_size; + + if (newmem == (vm_offset_t) 0) { + panic("zcram - memory at zero"); + } + elem_size = zone->elem_size; + + zone_lock(zone); + while (size >= elem_size) { + ADD_TO_ZONE(zone, newmem); + zone_page_alloc(newmem, elem_size); + zone_count_up(zone); /* compensate for ADD_TO_ZONE */ + size -= elem_size; + newmem += elem_size; + zone->cur_size += elem_size; + } + zone_unlock(zone); +} + +/* + * Contiguous space allocator for non-paged zones. Allocates "size" amount + * of memory from zone_map. + */ + +static vm_offset_t zget_space(vm_offset_t size) +{ + vm_offset_t new_space = 0; + vm_offset_t result; + vm_size_t space_to_add = 0; /*'=0' to quiet gcc warnings */ + + simple_lock(&zget_space_lock); + while ((zalloc_next_space + size) > zalloc_end_of_space) { + /* + * Add at least one page to allocation area. + */ + + space_to_add = round_page(size); + + if (new_space == 0) { + /* + * Memory cannot be wired down while holding + * any locks that the pageout daemon might + * need to free up pages. [Making the zget_space + * lock a complex lock does not help in this + * regard.] + * + * Unlock and allocate memory. Because several + * threads might try to do this at once, don't + * use the memory before checking for available + * space again. + */ + + simple_unlock(&zget_space_lock); + + if (kmem_alloc_wired(zone_map, + &new_space, space_to_add) + != KERN_SUCCESS) + return(0); + zone_page_init(new_space, space_to_add, + ZONE_PAGE_USED); + simple_lock(&zget_space_lock); + continue; + } + + + /* + * Memory was allocated in a previous iteration. + * + * Check whether the new region is contiguous + * with the old one. + */ + + if (new_space != zalloc_end_of_space) { + /* + * Throw away the remainder of the + * old space, and start a new one. + */ + zalloc_wasted_space += + zalloc_end_of_space - zalloc_next_space; + zalloc_next_space = new_space; + } + + zalloc_end_of_space = new_space + space_to_add; + + new_space = 0; + } + result = zalloc_next_space; + zalloc_next_space += size; + simple_unlock(&zget_space_lock); + + if (new_space != 0) + kmem_free(zone_map, new_space, space_to_add); + + return(result); +} + + +/* + * Initialize the "zone of zones" which uses fixed memory allocated + * earlier in memory initialization. zone_bootstrap is called + * before zone_init. + */ +void zone_bootstrap() +{ + simple_lock_init(&all_zones_lock); + first_zone = ZONE_NULL; + last_zone = &first_zone; + num_zones = 0; + + simple_lock_init(&zget_space_lock); + zalloc_next_space = zdata; + zalloc_end_of_space = zdata + zdata_size; + zalloc_wasted_space = 0; + + zone_zone = ZONE_NULL; + zone_zone = zinit(sizeof(struct zone), 128 * sizeof(struct zone), + sizeof(struct zone), 0, "zones"); +} + +void zone_init() +{ + vm_offset_t zone_min; + vm_offset_t zone_max; + + vm_size_t zone_table_size; + + zone_map = kmem_suballoc(kernel_map, &zone_min, &zone_max, + zone_map_size, FALSE); + + /* + * Setup garbage collection information: + */ + + zone_table_size = atop(zone_max - zone_min) * + sizeof(struct zone_page_table_entry); + if (kmem_alloc_wired(zone_map, (vm_offset_t *) &zone_page_table, + zone_table_size) != KERN_SUCCESS) + panic("zone_init"); + zone_min = (vm_offset_t)zone_page_table + round_page(zone_table_size); + zone_pages = atop(zone_max - zone_min); + zone_map_min_address = zone_min; + zone_map_max_address = zone_max; + simple_lock_init(&zone_page_table_lock); + zone_page_init(zone_min, zone_max - zone_min, ZONE_PAGE_UNUSED); +} + + +/* + * zalloc returns an element from the specified zone. + */ +vm_offset_t zalloc(zone_t zone) +{ + vm_offset_t addr; + + if (zone == ZONE_NULL) + panic ("zalloc: null zone"); + + check_simple_locks(); + + zone_lock(zone); + REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + while (addr == 0) { + /* + * If nothing was there, try to get more + */ + if (zone->doing_alloc) { + /* + * Someone is allocating memory for this zone. + * Wait for it to show up, then try again. + */ + assert_wait((event_t)&zone->doing_alloc, TRUE); + /* XXX say wakeup needed */ + zone_unlock(zone); + thread_block((void (*)()) 0); + zone_lock(zone); + } + else { + if ((zone->cur_size + (zone->type & ZONE_PAGEABLE ? + zone->alloc_size : zone->elem_size)) > + zone->max_size) { + if (zone->type & ZONE_EXHAUSTIBLE) + break; + /* + * Printf calls logwakeup, which calls + * select_wakeup which will do a zfree + * (which tries to take the select_zone + * lock... Hang. Release the lock now + * so it can be taken again later. + * NOTE: this used to be specific to + * the select_zone, but for + * cleanliness, we just unlock all + * zones before this. + */ + if (!(zone->type & ZONE_FIXED)) { + /* + * We're willing to overflow certain + * zones, but not without complaining. + * + * This is best used in conjunction + * with the collecatable flag. What we + * want is an assurance we can get the + * memory back, assuming there's no + * leak. + */ + zone->max_size += (zone->max_size >> 1); + } else if (!zone_ignore_overflow) { + zone_unlock(zone); + printf("zone \"%s\" empty.\n", + zone->zone_name); + panic("zalloc"); + } + } + + if (zone->type & ZONE_PAGEABLE) + zone->doing_alloc = TRUE; + zone_unlock(zone); + + if (zone->type & ZONE_PAGEABLE) { + if (kmem_alloc_pageable(zone_map, &addr, + zone->alloc_size) + != KERN_SUCCESS) + panic("zalloc"); + zcram(zone, addr, zone->alloc_size); + zone_lock(zone); + zone->doing_alloc = FALSE; + /* XXX check before doing this */ + thread_wakeup((event_t)&zone->doing_alloc); + + REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + } else if (zone->type & ZONE_COLLECTABLE) { + if (kmem_alloc_wired(zone_map, + &addr, zone->alloc_size) + != KERN_SUCCESS) + panic("zalloc"); + zone_page_init(addr, zone->alloc_size, + ZONE_PAGE_USED); + zcram(zone, addr, zone->alloc_size); + zone_lock(zone); + REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + } else { + addr = zget_space(zone->elem_size); + if (addr == 0) + panic("zalloc"); + + zone_lock(zone); + zone_count_up(zone); + zone->cur_size += zone->elem_size; + zone_unlock(zone); + zone_page_alloc(addr, zone->elem_size); + return(addr); + } + } + } + + zone_unlock(zone); + return(addr); +} + + +/* + * zget returns an element from the specified zone + * and immediately returns nothing if there is nothing there. + * + * This form should be used when you can not block (like when + * processing an interrupt). + */ +vm_offset_t zget(zone_t zone) +{ + register vm_offset_t addr; + + if (zone == ZONE_NULL) + panic ("zalloc: null zone"); + + zone_lock(zone); + REMOVE_FROM_ZONE(zone, addr, vm_offset_t); + zone_unlock(zone); + + return(addr); +} + +boolean_t zone_check = FALSE; + +void zfree(zone_t zone, vm_offset_t elem) +{ + zone_lock(zone); + if (zone_check) { + vm_offset_t this; + + /* check the zone's consistency */ + + for (this = zone->free_elements; + this != 0; + this = * (vm_offset_t *) this) + if (this == elem) + panic("zfree"); + } + ADD_TO_ZONE(zone, elem); + zone_unlock(zone); +} + +/* + * Zone garbage collection subroutines + * + * These routines have in common the modification of entries in the + * zone_page_table. The latter contains one entry for every page + * in the zone_map. + * + * For each page table entry in the given range: + * + * zone_page_in_use - decrements in_free_list + * zone_page_free - increments in_free_list + * zone_page_init - initializes in_free_list and alloc_count + * zone_page_alloc - increments alloc_count + * zone_page_dealloc - decrements alloc_count + * zone_add_free_page_list - adds the page to the free list + * + * Two counts are maintained for each page, the in_free_list count and + * alloc_count. The alloc_count is how many zone elements have been + * allocated from a page. (Note that the page could contain elements + * that span page boundaries. The count includes these elements so + * one element may be counted in two pages.) In_free_list is a count + * of how many zone elements are currently free. If in_free_list is + * equal to alloc_count then the page is eligible for garbage + * collection. + * + * Alloc_count and in_free_list are initialized to the correct values + * for a particular zone when a page is zcram'ed into a zone. Subsequent + * gets and frees of zone elements will call zone_page_in_use and + * zone_page_free which modify the in_free_list count. When the zones + * garbage collector runs it will walk through a zones free element list, + * remove the elements that reside on collectable pages, and use + * zone_add_free_page_list to create a list of pages to be collected. + */ + +void zone_page_in_use(addr, size) +vm_offset_t addr; +vm_size_t size; +{ + int i, j; + if ((addr < zone_map_min_address) || + (addr+size > zone_map_max_address)) return; + i = atop(addr-zone_map_min_address); + j = atop((addr+size-1) - zone_map_min_address); + lock_zone_page_table(); + for (; i <= j; i++) { + zone_page_table[i].in_free_list--; + } + unlock_zone_page_table(); +} + +void zone_page_free(addr, size) +vm_offset_t addr; +vm_size_t size; +{ + int i, j; + if ((addr < zone_map_min_address) || + (addr+size > zone_map_max_address)) return; + i = atop(addr-zone_map_min_address); + j = atop((addr+size-1) - zone_map_min_address); + lock_zone_page_table(); + for (; i <= j; i++) { + /* Set in_free_list to (ZONE_PAGE_USED + 1) if + * it was previously set to ZONE_PAGE_UNUSED. + */ + if (zone_page_table[i].in_free_list == ZONE_PAGE_UNUSED) { + zone_page_table[i].in_free_list = 1; + } else { + zone_page_table[i].in_free_list++; + } + } + unlock_zone_page_table(); +} + +void zone_page_init(addr, size, value) + +vm_offset_t addr; +vm_size_t size; +int value; +{ + int i, j; + if ((addr < zone_map_min_address) || + (addr+size > zone_map_max_address)) return; + i = atop(addr-zone_map_min_address); + j = atop((addr+size-1) - zone_map_min_address); + lock_zone_page_table(); + for (; i <= j; i++) { + zone_page_table[i].alloc_count = value; + zone_page_table[i].in_free_list = 0; + } + unlock_zone_page_table(); +} + +void zone_page_alloc(addr, size) +vm_offset_t addr; +vm_size_t size; +{ + int i, j; + if ((addr < zone_map_min_address) || + (addr+size > zone_map_max_address)) return; + i = atop(addr-zone_map_min_address); + j = atop((addr+size-1) - zone_map_min_address); + lock_zone_page_table(); + for (; i <= j; i++) { + /* Set alloc_count to (ZONE_PAGE_USED + 1) if + * it was previously set to ZONE_PAGE_UNUSED. + */ + if (zone_page_table[i].alloc_count == ZONE_PAGE_UNUSED) { + zone_page_table[i].alloc_count = 1; + } else { + zone_page_table[i].alloc_count++; + } + } + unlock_zone_page_table(); +} + +void zone_page_dealloc(addr, size) +vm_offset_t addr; +vm_size_t size; +{ + int i, j; + if ((addr < zone_map_min_address) || + (addr+size > zone_map_max_address)) return; + i = atop(addr-zone_map_min_address); + j = atop((addr+size-1) - zone_map_min_address); + lock_zone_page_table(); + for (; i <= j; i++) { + zone_page_table[i].alloc_count--; + } + unlock_zone_page_table(); +} + +void +zone_add_free_page_list(free_list, addr, size) + struct zone_page_table_entry **free_list; + vm_offset_t addr; + vm_size_t size; +{ + int i, j; + if ((addr < zone_map_min_address) || + (addr+size > zone_map_max_address)) return; + i = atop(addr-zone_map_min_address); + j = atop((addr+size-1) - zone_map_min_address); + lock_zone_page_table(); + for (; i <= j; i++) { + if (zone_page_table[i].alloc_count == 0) { + zone_page_table[i].next = *free_list; + *free_list = &zone_page_table[i]; + zone_page_table[i].alloc_count = ZONE_PAGE_UNUSED; + zone_page_table[i].in_free_list = 0; + } + } + unlock_zone_page_table(); +} + + +/* This is used for walking through a zone's free element list. + */ +struct zone_free_entry { + struct zone_free_entry * next; +}; + + +/* Zone garbage collection + * + * zone_gc will walk through all the free elements in all the + * zones that are marked collectable looking for reclaimable + * pages. zone_gc is called by consider_zone_gc when the system + * begins to run out of memory. + */ +static void zone_gc() +{ + int max_zones; + zone_t z; + int i; + register spl_t s; + struct zone_page_table_entry *freep; + struct zone_page_table_entry *zone_free_page_list; + + simple_lock(&all_zones_lock); + max_zones = num_zones; + z = first_zone; + simple_unlock(&all_zones_lock); + + zone_free_page_list = (struct zone_page_table_entry *) 0; + + for (i = 0; i < max_zones; i++) { + struct zone_free_entry * last; + struct zone_free_entry * elt; + assert(z != ZONE_NULL); + /* run this at splhigh so that interupt routines that use zones + can not interupt while their zone is locked */ + s=splhigh(); + zone_lock(z); + + if ((z->type & (ZONE_PAGEABLE|ZONE_COLLECTABLE)) == ZONE_COLLECTABLE) { + + /* Count the free elements in each page. This loop + * requires that all in_free_list entries are zero. + */ + elt = (struct zone_free_entry *)(z->free_elements); + while ((elt != (struct zone_free_entry *)0)) { + zone_page_free((vm_offset_t)elt, z->elem_size); + elt = elt->next; + } + + /* Now determine which elements should be removed + * from the free list and, after all the elements + * on a page have been removed, add the element's + * page to a list of pages to be freed. + */ + elt = (struct zone_free_entry *)(z->free_elements); + last = elt; + while ((elt != (struct zone_free_entry *)0)) { + if (((vm_offset_t)elt>=zone_map_min_address)&& + ((vm_offset_t)elt<=zone_map_max_address)&& + (zone_page(elt)->in_free_list == + zone_page(elt)->alloc_count)) { + + z->cur_size -= z->elem_size; + zone_page_in_use((vm_offset_t)elt, z->elem_size); + zone_page_dealloc((vm_offset_t)elt, z->elem_size); + if (zone_page(elt)->alloc_count == 0 || + zone_page(elt+(z->elem_size-1))->alloc_count==0) { + zone_add_free_page_list( + &zone_free_page_list, + (vm_offset_t)elt, z->elem_size); + } + + + if (elt == last) { + elt = elt->next; + z->free_elements =(vm_offset_t)elt; + last = elt; + } else { + last->next = elt->next; + elt = elt->next; + } + } else { + /* This element is not eligible for collection + * so clear in_free_list in preparation for a + * subsequent garbage collection pass. + */ + if (((vm_offset_t)elt>=zone_map_min_address)&& + ((vm_offset_t)elt<=zone_map_max_address)) { + zone_page(elt)->in_free_list = 0; + } + last = elt; + elt = elt->next; + } + } + } + zone_unlock(z); + splx(s); + simple_lock(&all_zones_lock); + z = z->next_zone; + simple_unlock(&all_zones_lock); + } + + for (freep = zone_free_page_list; freep != 0; freep = freep->next) { + vm_offset_t free_addr; + + free_addr = zone_map_min_address + + PAGE_SIZE * (freep - zone_page_table); + kmem_free(zone_map, free_addr, PAGE_SIZE); + } +} + +boolean_t zone_gc_allowed = TRUE; +unsigned zone_gc_last_tick = 0; +unsigned zone_gc_max_rate = 0; /* in ticks */ + +/* + * consider_zone_gc: + * + * Called by the pageout daemon when the system needs more free pages. + */ + +void +consider_zone_gc() +{ + /* + * By default, don't attempt zone GC more frequently + * than once a second. + */ + + if (zone_gc_max_rate == 0) + zone_gc_max_rate = hz; + + if (zone_gc_allowed && + (sched_tick > (zone_gc_last_tick + zone_gc_max_rate))) { + zone_gc_last_tick = sched_tick; + zone_gc(); + } +} + +#if MACH_DEBUG +kern_return_t host_zone_info(host, namesp, namesCntp, infop, infoCntp) + host_t host; + zone_name_array_t *namesp; + unsigned int *namesCntp; + zone_info_array_t *infop; + unsigned int *infoCntp; +{ + zone_name_t *names; + vm_offset_t names_addr; + vm_size_t names_size = 0; /*'=0' to quiet gcc warnings */ + zone_info_t *info; + vm_offset_t info_addr; + vm_size_t info_size = 0; /*'=0' to quiet gcc warnings */ + unsigned int max_zones, i; + zone_t z; + kern_return_t kr; + + if (host == HOST_NULL) + return KERN_INVALID_HOST; + + /* + * We assume that zones aren't freed once allocated. + * We won't pick up any zones that are allocated later. + */ + + simple_lock(&all_zones_lock); + max_zones = num_zones; + z = first_zone; + simple_unlock(&all_zones_lock); + + if (max_zones <= *namesCntp) { + /* use in-line memory */ + + names = *namesp; + } else { + names_size = round_page(max_zones * sizeof *names); + kr = kmem_alloc_pageable(ipc_kernel_map, + &names_addr, names_size); + if (kr != KERN_SUCCESS) + return kr; + + names = (zone_name_t *) names_addr; + } + + if (max_zones <= *infoCntp) { + /* use in-line memory */ + + info = *infop; + } else { + info_size = round_page(max_zones * sizeof *info); + kr = kmem_alloc_pageable(ipc_kernel_map, + &info_addr, info_size); + if (kr != KERN_SUCCESS) { + if (names != *namesp) + kmem_free(ipc_kernel_map, + names_addr, names_size); + return kr; + } + + info = (zone_info_t *) info_addr; + } + + for (i = 0; i < max_zones; i++) { + zone_name_t *zn = &names[i]; + zone_info_t *zi = &info[i]; + struct zone zcopy; + + assert(z != ZONE_NULL); + + zone_lock(z); + zcopy = *z; + zone_unlock(z); + + simple_lock(&all_zones_lock); + z = z->next_zone; + simple_unlock(&all_zones_lock); + + /* assuming here the name data is static */ + (void) strncpy(zn->zn_name, zcopy.zone_name, + sizeof zn->zn_name); + +#ifdef ZONE_COUNT + zi->zi_count = zcopy.count; +#else + zi->zi_count = 0; +#endif + zi->zi_cur_size = zcopy.cur_size; + zi->zi_max_size = zcopy.max_size; + zi->zi_elem_size = zcopy.elem_size; + zi->zi_alloc_size = zcopy.alloc_size; + zi->zi_pageable = (zcopy.type & ZONE_PAGEABLE) != 0; + zi->zi_exhaustible = (zcopy.type & ZONE_EXHAUSTIBLE) != 0; + zi->zi_collectable = (zcopy.type & ZONE_COLLECTABLE) != 0; + } + + if (names != *namesp) { + vm_size_t used; + vm_map_copy_t copy; + + used = max_zones * sizeof *names; + + if (used != names_size) + bzero((char *) (names_addr + used), names_size - used); + + kr = vm_map_copyin(ipc_kernel_map, names_addr, names_size, + TRUE, ©); + assert(kr == KERN_SUCCESS); + + *namesp = (zone_name_t *) copy; + } + *namesCntp = max_zones; + + if (info != *infop) { + vm_size_t used; + vm_map_copy_t copy; + + used = max_zones * sizeof *info; + + if (used != info_size) + bzero((char *) (info_addr + used), info_size - used); + + kr = vm_map_copyin(ipc_kernel_map, info_addr, info_size, + TRUE, ©); + assert(kr == KERN_SUCCESS); + + *infop = (zone_info_t *) copy; + } + *infoCntp = max_zones; + + return KERN_SUCCESS; +} +#endif MACH_DEBUG diff --git a/kern/zalloc.h b/kern/zalloc.h new file mode 100644 index 0000000..2e9b4b3 --- /dev/null +++ b/kern/zalloc.h @@ -0,0 +1,135 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University. + * Copyright (c) 1993,1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: zalloc.h + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + */ + +#ifndef _KERN_ZALLOC_H_ +#define _KERN_ZALLOC_H_ + +#include <mach/machine/vm_types.h> +#include <kern/macro_help.h> +#include <kern/lock.h> +#include <kern/queue.h> +#include <machine/zalloc.h> + +/* + * A zone is a collection of fixed size blocks for which there + * is fast allocation/deallocation access. Kernel routines can + * use zones to manage data structures dynamically, creating a zone + * for each type of data structure to be managed. + * + */ + +struct zone { + decl_simple_lock_data(,lock) /* generic lock */ +#ifdef ZONE_COUNT + int count; /* Number of elements used now */ +#endif + vm_offset_t free_elements; + vm_size_t cur_size; /* current memory utilization */ + vm_size_t max_size; /* how large can this zone grow */ + vm_size_t elem_size; /* size of an element */ + vm_size_t alloc_size; /* size used for more memory */ + boolean_t doing_alloc; /* is zone expanding now? */ + char *zone_name; /* a name for the zone */ + unsigned int type; /* type of memory */ + lock_data_t complex_lock; /* Lock for pageable zones */ + struct zone *next_zone; /* Link for all-zones list */ +}; +typedef struct zone *zone_t; + +#define ZONE_NULL ((zone_t) 0) + +/* Exported to everyone */ +zone_t zinit(vm_size_t size, vm_size_t max, vm_size_t alloc, + unsigned int memtype, char *name); +vm_offset_t zalloc(zone_t zone); +vm_offset_t zget(zone_t zone); +void zfree(zone_t zone, vm_offset_t elem); +void zcram(zone_t zone, vm_offset_t newmem, vm_size_t size); + +/* Exported only to vm/vm_init.c */ +void zone_bootstrap(); +void zone_init(); + +/* Exported only to vm/vm_pageout.c */ +void consider_zone_gc(); + + +/* Memory type bits for zones */ +#define ZONE_PAGEABLE 0x00000001 +#define ZONE_COLLECTABLE 0x00000002 /* Garbage-collect this zone when memory runs low */ +#define ZONE_EXHAUSTIBLE 0x00000004 /* zalloc() on this zone is allowed to fail */ +#define ZONE_FIXED 0x00000008 /* Panic if zone is exhausted (XXX) */ + +/* Machine-dependent code can provide additional memory types. */ +#define ZONE_MACHINE_TYPES 0xffff0000 + + +#ifdef ZONE_COUNT +#define zone_count_up(zone) ((zone)->count++) +#define zone_count_down(zone) ((zone)->count--) +#else +#define zone_count_up(zone) +#define zone_count_down(zone) +#endif + + + +/* These quick inline versions only work for small, nonpageable zones (currently). */ + +static __inline vm_offset_t ZALLOC(zone_t zone) +{ + simple_lock(&zone->lock); + if (zone->free_elements == 0) { + simple_unlock(&zone->lock); + return zalloc(zone); + } else { + vm_offset_t element = zone->free_elements; + zone->free_elements = *((vm_offset_t *)(element)); + zone_count_up(zone); + simple_unlock(&zone->lock); + return element; + } +} + +static __inline void ZFREE(zone_t zone, vm_offset_t element) +{ + *((vm_offset_t *)(element)) = zone->free_elements; + zone->free_elements = (vm_offset_t) (element); + zone_count_down(zone); +} + + + +#endif _KERN_ZALLOC_H_ |