summaryrefslogtreecommitdiff
path: root/serverboot
diff options
context:
space:
mode:
authorThomas Bushnell <thomas@gnu.org>1997-04-03 23:28:32 +0000
committerThomas Bushnell <thomas@gnu.org>1997-04-03 23:28:32 +0000
commit3dd031e1f1571f5b60409c17efe90cc51d51a25c (patch)
tree3491c55317bb9976413ca314a11d35d2808753f2 /serverboot
parent6812102eb13b3c58c79f42f6a6f5c5081f8dfcad (diff)
Initial Revision
Diffstat (limited to 'serverboot')
-rw-r--r--serverboot/ChangeLog53
-rw-r--r--serverboot/Makefile43
-rw-r--r--serverboot/assert.h50
-rw-r--r--serverboot/bootstrap.c408
-rw-r--r--serverboot/def_pager_setup.c136
-rw-r--r--serverboot/default_pager.c3536
-rw-r--r--serverboot/defs.h95
-rw-r--r--serverboot/dir.h142
-rw-r--r--serverboot/disk_inode.h101
-rw-r--r--serverboot/disk_inode_ffs.h99
-rw-r--r--serverboot/elf-load.c88
-rw-r--r--serverboot/exec.c88
-rw-r--r--serverboot/ext2_file_io.c1099
-rw-r--r--serverboot/ext2_fs.h451
-rw-r--r--serverboot/ffs_compat.c63
-rw-r--r--serverboot/ffs_compat.h54
-rw-r--r--serverboot/ffs_file_io.c1085
-rw-r--r--serverboot/file_io.c225
-rw-r--r--serverboot/file_io.h174
-rw-r--r--serverboot/fs.h455
-rw-r--r--serverboot/gets.c90
-rw-r--r--serverboot/kalloc.c274
-rw-r--r--serverboot/load.c406
-rw-r--r--serverboot/minix_ffs_compat.c62
-rw-r--r--serverboot/minix_ffs_compat.h43
-rw-r--r--serverboot/minix_file_io.c966
-rw-r--r--serverboot/minix_fs.h107
-rw-r--r--serverboot/minix_super.h49
-rw-r--r--serverboot/panic.c59
-rw-r--r--serverboot/queue.h316
-rw-r--r--serverboot/strfcns.c117
-rw-r--r--serverboot/translate_root.c124
-rw-r--r--serverboot/translate_root.h41
-rw-r--r--serverboot/wiring.c140
-rw-r--r--serverboot/wiring.h35
35 files changed, 11274 insertions, 0 deletions
diff --git a/serverboot/ChangeLog b/serverboot/ChangeLog
new file mode 100644
index 00000000..52f60f35
--- /dev/null
+++ b/serverboot/ChangeLog
@@ -0,0 +1,53 @@
+Wed Mar 19 14:45:27 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * panic.c (panic): Clear possible errors on stdout before printing
+ panic string.
+
+Mon Mar 17 13:13:50 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * wiring.c (wire_all_memory): Don't attempt wire if PROTECTION is
+ VM_PROT_NONE.
+
+ * panic.c (panic): Be more informative about where the error is
+ coming from.
+
+ * default_pager.c (create_paging_partition): Don't print
+ gratuitous output noise.
+ * load.c (boot_script_exec_cmd): Likewise.
+
+Wed Mar 12 10:53:00 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * ext2_file_io.c (ext2_open_file): Clear FP before beginning
+ work.
+ * ffs_file_io.c (ffs_open_file): Likewise.
+ * minix_file_io.c (minix_open_file): Likewise.
+
+ * bootstrap.c (printf_init, safe_gets): New functions.
+ * console.c: Deleted file.
+ * Makefile (SRCS): Omit console.c and gets.c.
+
+ * load.c (read_symtab_from_file): Comment out body of function.
+ We don't want this.
+
+ * defs.h: Comment out redefinitions of common types.
+
+ * default_pager.c: Include <cthreads.h> instead of
+ <mach/cthreads.h>.
+ * file_io.h: Likewise.
+ * kalloc.c: Likewise.
+
+ * panic.c: Include <varargs.h> instead of <sys/varargs.h>.
+
+ * default_pager.c (pager_read_offset): Cast return of NO_BLOCK
+ properly.
+
+Mon Mar 10 17:07:50 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu>
+
+ * load.c: Find boot_script.h in ../boot.
+ * bootstrap.c: Likewise.
+
+ * bootstrap.c (boot_panic): Repair syntax.
+
+ * strfcns.c: Include <varargs.h> instead of <sys/varargs.h>.
+ * load.c: Likewise.
+
diff --git a/serverboot/Makefile b/serverboot/Makefile
new file mode 100644
index 00000000..05a07d58
--- /dev/null
+++ b/serverboot/Makefile
@@ -0,0 +1,43 @@
+# Copyright (C) 1997 Free Software Foundation, Inc.
+# This file is part of the GNU Hurd.
+#
+# The GNU Hurd is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# The GNU Hurd is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the GNU Hurd; see the file COPYING. If not, write to
+# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+dir := serverboot
+makemode := utility
+
+SRCS = bootstrap.c ffs_compat.c load.c wiring.c def_pager_setup.c \
+ ffs_file_io.c minix_ffs_compat.c default_pager.c file_io.c\
+ minix_file_io.c ext2_file_io.c kalloc.c strfcns.c exec.c \
+ translate_root.c panic.c elf-load.c
+OBJS = $(subst .c,.o,$(SRCS)) boot_script.o memory_objectServer.o \
+ default_pagerServer.o excServer.o bootstrapServer.o \
+ memory_object_defaultServer.o
+LCLHDRS = assert.h disk_inode_ffs.h fs.h queue.h defs.h ext2_fs.h \
+ minix_ffs_compat.h wiring.h dir.h ffs_compat.h minix_fs.h \
+ disk_inode.h file_io.h minix_super.h translate_root.h
+target = serverboot
+HURDLIBS = threads
+
+vpath boot_script.c $(srcdir)/../boot
+
+MIGSFLAGS = -DSEQNOS
+
+LDFLAGS += -static
+
+include ../Makeconf
+
+# Don't even bother.
+CFLAGS := $(filter-out -Wall,$(CFLAGS)) \ No newline at end of file
diff --git a/serverboot/assert.h b/serverboot/assert.h
new file mode 100644
index 00000000..9bcab69e
--- /dev/null
+++ b/serverboot/assert.h
@@ -0,0 +1,50 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#ifndef _ASSERT_H_
+#define _ASSERT_H_
+
+#ifdef ASSERTIONS
+extern void Assert();
+
+#define assert(ex) \
+ do { \
+ if (!(ex)) \
+ Assert(__FILE__, __LINE__); \
+ } while (0)
+
+#ifdef lint
+#define assert_static(x)
+#else lint
+#define assert_static(x) assert(x)
+#endif lint
+
+#else /* ASSERTIONS */
+#define assert(ex)
+#define assert_static(ex)
+#endif /* ASSERTIONS */
+
+#endif /* _ASSERT_H_ */
diff --git a/serverboot/bootstrap.c b/serverboot/bootstrap.c
new file mode 100644
index 00000000..b603a55f
--- /dev/null
+++ b/serverboot/bootstrap.c
@@ -0,0 +1,408 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1992,1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+/*
+ * Bootstrap the various built-in servers.
+ */
+
+#include <mach.h>
+#include <mach/message.h>
+
+#include <file_io.h>
+
+#include <stdio.h>
+
+#include "../boot/boot_script.h"
+#include "translate_root.h"
+
+/*
+ * Use 8 Kbyte stacks instead of the default 64K.
+ * Use 4 Kbyte waiting stacks instead of the default 8K.
+ */
+#if defined(alpha)
+vm_size_t cthread_stack_size = 16 * 1024;
+#else
+vm_size_t cthread_stack_size = 8 * 1024;
+#endif
+
+extern
+vm_size_t cthread_wait_stack_size;
+
+mach_port_t bootstrap_master_device_port; /* local name */
+mach_port_t bootstrap_master_host_port; /* local name */
+
+int boot_load_program();
+
+char *root_name;
+char boot_script_name[MAXPATHLEN];
+
+extern void default_pager();
+extern void default_pager_initialize();
+extern void default_pager_setup();
+
+/* initialized in default_pager_initialize */
+extern mach_port_t default_pager_exception_port;
+extern mach_port_t default_pager_bootstrap_port;
+
+/*
+ * Convert ASCII to integer.
+ */
+int atoi(str)
+ register const char *str;
+{
+ register int n;
+ register int c;
+ int is_negative = 0;
+
+ n = 0;
+ while (*str == ' ')
+ str++;
+ if (*str == '-') {
+ is_negative = 1;
+ str++;
+ }
+ while ((c = *str++) >= '0' && c <= '9') {
+ n = n * 10 + (c - '0');
+ }
+ if (is_negative)
+ n = -n;
+ return (n);
+}
+
+__main ()
+{
+}
+
+static void
+boot_panic (kern_return_t err)
+{
+#define PFX "bootstrap: "
+ char *err_string = boot_script_error_string (err);
+ char panic_string[strlen (err_string) + sizeof (PFX)];
+ strcpy (panic_string, PFX);
+ strcat (panic_string, err_string);
+ panic (panic_string);
+#undef PFX
+}
+
+void
+safe_gets (char *str, int maxlen)
+{
+ char *c;
+ c = index (fgets (str, maxlen, stdin), '\n');
+ *c = '\0';
+}
+
+printf_init (device_t master)
+{
+ mach_port_t cons;
+ device_open (master, D_READ|D_WRITE, "console", &cons);
+ stdin = mach_open_devstream (cons, "r");
+ stdout = stderr = mach_open_devstream (cons, "w");
+ mach_port_deallocate (mach_task_self (), cons);
+ setbuf (stdout, 0);
+}
+
+/*
+ * Bootstrap task.
+ * Runs in user spacep.
+ *
+ * Called as 'boot -switches host_port device_port root_name'
+ *
+ */
+main(argc, argv)
+ int argc;
+ char **argv;
+{
+ int doing_default_pager = 0;
+ int script_paging_file (const struct cmd *cmd, int *val)
+ {
+ if (add_paging_file (bootstrap_master_device_port, cmd->path))
+ {
+ printf ("(bootstrap): %s: Cannot add paging file\n", cmd->path);
+ return BOOT_SCRIPT_MACH_ERROR;
+ }
+ return 0;
+ }
+ int script_default_pager (const struct cmd *cmd, int *val)
+ {
+ default_pager_initialize(bootstrap_master_host_port);
+ doing_default_pager = 1;
+ return 0;
+ }
+
+ register kern_return_t result;
+ struct file scriptf;
+
+ task_t my_task = mach_task_self();
+
+ char *flag_string;
+
+ boolean_t ask_boot_script = 0;
+
+ static char new_root[16];
+
+ /*
+ * Use 4Kbyte cthread wait stacks.
+ */
+ cthread_wait_stack_size = 4 * 1024;
+
+ /*
+ * Parse the arguments.
+ */
+ if (argc < 5)
+ panic("bootstrap: not enough arguments");
+
+ /*
+ * Arg 0 is program name
+ */
+
+ /*
+ * Arg 1 is flags
+ */
+ if (argv[1][0] != '-')
+ panic("bootstrap: no flags");
+
+ flag_string = argv[1];
+
+ /*
+ * Arg 2 is host port number
+ */
+ bootstrap_master_host_port = atoi(argv[2]);
+
+ /*
+ * Arg 3 is device port number
+ */
+ bootstrap_master_device_port = atoi(argv[3]);
+
+ /*
+ * Arg 4 is root name
+ */
+ root_name = argv[4];
+
+ printf_init(bootstrap_master_device_port);
+#ifdef pleasenoXXX
+ panic_init(bootstrap_master_host_port);
+#endif
+
+ if (root_name[0] == '\0')
+ root_name = DEFAULT_ROOT;
+
+ /*
+ * If the '-a' (ask) switch was specified, ask for
+ * the root device.
+ */
+
+ if (index(flag_string, 'a')) {
+ printf("root device? [%s] ", root_name);
+ safe_gets(new_root, sizeof(new_root));
+ }
+
+ if (new_root[0] == '\0')
+ strcpy(new_root, root_name);
+
+ root_name = translate_root(new_root);
+
+ (void) strbuild(boot_script_name,
+ "/dev/",
+ root_name,
+ "/boot/servers.boot",
+ (char *)0);
+
+ /*
+ * If the '-q' (query) switch was specified, ask for the
+ * server boot script.
+ */
+
+ if (index(flag_string, 'q'))
+ ask_boot_script = TRUE;
+
+ while (TRUE) {
+ if (ask_boot_script) {
+ char new_boot_script[MAXPATHLEN];
+
+ printf("Server boot script? [%s] ", boot_script_name);
+ safe_gets(new_boot_script, sizeof(new_boot_script));
+ if (new_boot_script[0] != '\0')
+ strcpy(boot_script_name, new_boot_script);
+ }
+
+ result = open_file(bootstrap_master_device_port,
+ boot_script_name,
+ &scriptf);
+ if (result != 0) {
+ printf("Can't open server boot script %s: %d\n",
+ boot_script_name,
+ result);
+ ask_boot_script = TRUE;
+ continue;
+ }
+ break;
+ }
+
+ /*
+ * If the server boot script name was changed,
+ * then use the new device name as the root device.
+ */
+ {
+ char *dev, *end;
+ int len;
+
+ dev = boot_script_name;
+ if (strncmp(dev, "/dev/", 5) == 0)
+ dev += 5;
+ end = strchr(dev, '/');
+ len = end ? end-dev : strlen(dev);
+ memcpy(root_name, dev, len);
+ root_name[len] = 0;
+ }
+
+ /*
+ * Set up the default pager.
+ */
+ partition_init();
+
+ {
+ /* Initialize boot script variables. */
+ if (boot_script_set_variable ("host-port", VAL_PORT,
+ (int) bootstrap_master_host_port)
+ || boot_script_set_variable ("device-port", VAL_PORT,
+ (int) bootstrap_master_device_port)
+ || boot_script_set_variable ("root-device", VAL_STR,
+ (int) root_name)
+ || boot_script_set_variable ("boot-args", VAL_STR,
+ (int) flag_string)
+ || boot_script_define_function ("add-paging-file", VAL_NONE,
+ &script_paging_file)
+ || boot_script_define_function ("default-pager", VAL_NONE,
+ &script_default_pager)
+ )
+ panic ("bootstrap: error setting boot script variables");
+
+ parse_script (&scriptf);
+ close_file (&scriptf);
+ }
+
+ if (index (flag_string, 'd'))
+ {
+ char c;
+ printf ("Hit return to boot...");
+ safe_gets (&c, 1);
+ }
+
+ /*
+ * task_set_exception_port and task_set_bootstrap_port
+ * both require a send right.
+ */
+ (void) mach_port_insert_right(my_task, default_pager_bootstrap_port,
+ default_pager_bootstrap_port,
+ MACH_MSG_TYPE_MAKE_SEND);
+ (void) mach_port_insert_right(my_task, default_pager_exception_port,
+ default_pager_exception_port,
+ MACH_MSG_TYPE_MAKE_SEND);
+
+ /*
+ * Change our exception port.
+ */
+ (void) task_set_exception_port(my_task, default_pager_exception_port);
+
+ result = boot_script_exec ();
+
+ if (result)
+ boot_panic (result);
+
+#if 0
+ {
+ /*
+ * Delete the old stack (containing only the arguments).
+ */
+ vm_offset_t addr = (vm_offset_t) argv;
+
+ vm_offset_t r_addr;
+ vm_size_t r_size;
+ vm_prot_t r_protection, r_max_protection;
+ vm_inherit_t r_inheritance;
+ boolean_t r_is_shared;
+ memory_object_name_t r_object_name;
+ vm_offset_t r_offset;
+ kern_return_t kr;
+
+ r_addr = addr;
+
+ kr = vm_region(my_task,
+ &r_addr,
+ &r_size,
+ &r_protection,
+ &r_max_protection,
+ &r_inheritance,
+ &r_is_shared,
+ &r_object_name,
+ &r_offset);
+ if ((kr == KERN_SUCCESS) && MACH_PORT_VALID(r_object_name))
+ (void) mach_port_deallocate(my_task, r_object_name);
+ if ((kr == KERN_SUCCESS) &&
+ (r_addr <= addr) &&
+ ((r_protection & (VM_PROT_READ|VM_PROT_WRITE)) ==
+ (VM_PROT_READ|VM_PROT_WRITE)))
+ (void) vm_deallocate(my_task, r_addr, r_size);
+ }
+#endif
+
+ if (! doing_default_pager)
+ task_terminate (mach_task_self ());
+
+ /*
+ * Become the default pager
+ */
+ default_pager();
+ /*NOTREACHED*/
+}
+
+/* Parse the boot script. */
+parse_script (struct file *f)
+{
+ char *p, *line, *buf;
+ int amt, fd, err;
+ int n = 0;
+
+ buf = malloc (f->f_size);
+ if (read_file (f, 0, buf, f->f_size, 0))
+ panic ("bootstrap: error reading boot script file");
+
+ line = p = buf;
+ while (1)
+ {
+ while (p < buf + f->f_size && *p != '\n')
+ p++;
+ *p = '\0';
+ err = boot_script_parse_line (line);
+ if (err)
+ boot_panic (err);
+ if (p == buf + f->f_size)
+ break;
+ line = ++p;
+
+ }
+}
diff --git a/serverboot/def_pager_setup.c b/serverboot/def_pager_setup.c
new file mode 100644
index 00000000..fe6b33ab
--- /dev/null
+++ b/serverboot/def_pager_setup.c
@@ -0,0 +1,136 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1992-1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+#include <mach.h>
+
+#include <file_io.h>
+
+extern void *kalloc();
+
+/*
+ * Create a paging partition given a file name
+ */
+extern void create_paging_partition();
+
+kern_return_t
+add_paging_file(master_device_port, file_name)
+ mach_port_t master_device_port;
+ char *file_name;
+{
+ register struct file_direct *fdp;
+ register kern_return_t result;
+ struct file pfile;
+ boolean_t isa_file;
+
+ bzero((char *) &pfile, sizeof(struct file));
+
+ result = open_file(master_device_port, file_name, &pfile);
+ if (result != KERN_SUCCESS)
+ return result;
+
+ fdp = (struct file_direct *) kalloc(sizeof *fdp);
+ bzero((char *) fdp, sizeof *fdp);
+
+ isa_file = file_is_structured(&pfile);
+
+ result = open_file_direct(pfile.f_dev, fdp, isa_file);
+ if (result)
+ panic("Can't open paging file %s\n", file_name);
+
+ result = add_file_direct(fdp, &pfile);
+ if (result)
+ panic("Can't read disk addresses: %d\n", result);
+
+ close_file(&pfile);
+
+ /*
+ * Set up the default paging partition
+ */
+ create_paging_partition(file_name, fdp, isa_file);
+
+ return result;
+}
+
+/*
+ * Destroy a paging_partition given a file name
+ */
+kern_return_t
+remove_paging_file(file_name)
+ char *file_name;
+{
+ struct file_direct *fdp = 0;
+ kern_return_t kr;
+
+ kr = destroy_paging_partition(file_name, &fdp);
+ if (kr == KERN_SUCCESS) {
+ remove_file_direct(fdp);
+ kfree(fdp, sizeof(*fdp));
+ }
+ return kr;
+}
+
+/*
+ * Set up default pager
+ */
+extern char *strbuild();
+
+boolean_t
+default_pager_setup(master_device_port, server_dir_name)
+ mach_port_t master_device_port;
+ char *server_dir_name;
+{
+ register kern_return_t result;
+
+ char paging_file_name[MAXPATHLEN+1];
+
+ (void) strbuild(paging_file_name,
+ server_dir_name,
+ "/paging_file",
+ (char *)0);
+
+ while (TRUE) {
+ result = add_paging_file(master_device_port, paging_file_name);
+ if (result == KERN_SUCCESS)
+ break;
+ printf("Can't open paging file %s: %d\n",
+ paging_file_name,
+ result);
+
+ bzero(paging_file_name, sizeof(paging_file_name));
+ printf("Paging file name ? ");
+ safe_gets(paging_file_name, sizeof(paging_file_name));
+
+ if (paging_file_name[0] == 0) {
+ printf("*** WARNING: running without paging area!\n");
+ return FALSE;
+ }
+ }
+
+ /*
+ * Our caller will become the default pager - later
+ */
+
+ return TRUE;
+}
diff --git a/serverboot/default_pager.c b/serverboot/default_pager.c
new file mode 100644
index 00000000..6c44bd0d
--- /dev/null
+++ b/serverboot/default_pager.c
@@ -0,0 +1,3536 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Default pager. Pages to paging partition.
+ *
+ * MUST BE ABLE TO ALLOCATE WIRED-DOWN MEMORY!!!
+ */
+
+#include <mach.h>
+#include <mach/message.h>
+#include <mach/notify.h>
+#include <mach/mig_errors.h>
+#include <mach/thread_switch.h>
+#include <mach/task_info.h>
+#include <mach/default_pager_types.h>
+
+#include <cthreads.h>
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <queue.h>
+#include <wiring.h>
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "file_io.h"
+
+#define debug 1
+
+extern void *kalloc();
+
+static char my_name[] = "(default pager):";
+
+/*
+ * parallel vs serial switch
+ */
+#define PARALLEL 1
+
+#if 0
+#define CHECKSUM 1
+#endif
+
+#define USE_PRECIOUS 1
+
+#define ptoa(p) ((p)*vm_page_size)
+#define atop(a) ((a)/vm_page_size)
+
+/*
+
+ */
+/*
+ * Bitmap allocation.
+ */
+typedef unsigned int bm_entry_t;
+#define NB_BM 32
+#define BM_MASK 0xffffffff
+
+#define howmany(a,b) (((a) + (b) - 1)/(b))
+
+/*
+ * Value to indicate no block assigned
+ */
+#define NO_BLOCK ((vm_offset_t)-1)
+
+/*
+ * 'Partition' structure for each paging area.
+ * Controls allocation of blocks within paging area.
+ */
+struct part {
+ struct mutex p_lock; /* for bitmap/free */
+ vm_size_t total_size; /* total number of blocks */
+ vm_size_t free; /* number of blocks free */
+ unsigned int id; /* named lookup */
+ bm_entry_t *bitmap; /* allocation map */
+ boolean_t going_away; /* destroy attempt in progress */
+ struct file_direct *file; /* file paged to */
+};
+typedef struct part *partition_t;
+
+struct {
+ struct mutex lock;
+ int n_partitions;
+ partition_t *partition_list;/* array, for quick mapping */
+} all_partitions; /* list of all such */
+
+typedef unsigned char p_index_t;
+
+#define P_INDEX_INVALID ((p_index_t)-1)
+
+#define no_partition(x) ((x) == P_INDEX_INVALID)
+
+partition_t partition_of(x)
+ int x;
+{
+ if (x >= all_partitions.n_partitions || x < 0)
+ panic("partition_of x%x", x);
+ return all_partitions.partition_list[x];
+}
+
+void set_partition_of(x, p)
+ int x;
+ partition_t p;
+{
+ if (x >= all_partitions.n_partitions || x < 0)
+ panic("set_partition_of x%x", x);
+ all_partitions.partition_list[x] = p;
+}
+
+/*
+ * Simple mapping from (file)NAME to id
+ * Saves space, filenames can be long.
+ */
+unsigned int
+part_id(name)
+ unsigned char *name;
+{
+ register unsigned int len, id, xorid;
+
+ len = strlen(name);
+ id = xorid = 0;
+ while (len--) {
+ xorid ^= *name;
+ id += *name++;
+ }
+ return (id << 8) | xorid;
+}
+
+partition_init()
+{
+ mutex_init(&all_partitions.lock);
+ all_partitions.n_partitions = 0;
+}
+
+static partition_t
+new_partition (const char *name, struct file_direct *fdp)
+{
+ register partition_t part;
+ register vm_size_t size, bmsize;
+
+ size = atop(fdp->fd_size * fdp->fd_bsize);
+ bmsize = howmany(size, NB_BM) * sizeof(bm_entry_t);
+
+ part = (partition_t) kalloc(sizeof(struct part));
+ mutex_init(&part->p_lock);
+ part->total_size = size;
+ part->free = size;
+ part->id = part_id(name);
+ part->bitmap = (bm_entry_t *)kalloc(bmsize);
+ part->going_away= FALSE;
+ part->file = fdp;
+
+ bzero((char *)part->bitmap, bmsize);
+
+ return part;
+}
+
+/*
+ * Create a partition descriptor,
+ * add it to the list of all such.
+ * size is in BYTES.
+ */
+void
+create_paging_partition(const char *name,
+ struct file_direct *fdp, int isa_file)
+{
+ register partition_t part;
+
+ part = new_partition (name, fdp);
+
+ mutex_lock(&all_partitions.lock);
+ {
+ register int i;
+
+ for (i = 0; i < all_partitions.n_partitions; i++)
+ if (partition_of(i) == 0) break;
+
+ if (i == all_partitions.n_partitions) {
+ register partition_t *new_list, *old_list;
+ register int n;
+
+ n = i ? (i<<1) : 2;
+ new_list = (partition_t *)
+ kalloc( n * sizeof(partition_t) );
+ if (new_list == 0) no_paging_space(TRUE);
+ bzero(new_list, n*sizeof(partition_t));
+ if (i) {
+ old_list = all_partitions.partition_list;
+ bcopy(old_list, new_list, i*sizeof(partition_t));
+ }
+ all_partitions.partition_list = new_list;
+ all_partitions.n_partitions = n;
+ if (i) kfree(old_list, i*sizeof(partition_t));
+ }
+ set_partition_of(i, part);
+ }
+ mutex_unlock(&all_partitions.lock);
+
+#if 0
+ printf("%s Added paging %s %s\n", my_name,
+ (isa_file) ? "file" : "device", name);
+#endif
+ overcommitted(TRUE, part->free);
+}
+
+/*
+ * Choose the most appropriate default partition
+ * for an object of SIZE bytes.
+ * Return the partition locked, unless
+ * the object has no CUR_PARTition.
+ */
+p_index_t
+choose_partition(size, cur_part)
+ unsigned int size;
+ register p_index_t cur_part;
+{
+ register partition_t part;
+ register boolean_t found = FALSE;
+ register int i;
+
+ mutex_lock(&all_partitions.lock);
+ for (i = 0; i < all_partitions.n_partitions; i++) {
+
+ /* the undesireable one ? */
+ if (i == cur_part)
+ continue;
+
+ /* one that was removed ? */
+ if ((part = partition_of(i)) == 0)
+ continue;
+
+ /* one that is being removed ? */
+ if (part->going_away)
+ continue;
+
+ /* is it big enough ? */
+ mutex_lock(&part->p_lock);
+ if (ptoa(part->free) >= size) {
+ if (cur_part != P_INDEX_INVALID) {
+ mutex_unlock(&all_partitions.lock);
+ return (p_index_t)i;
+ } else
+ found = TRUE;
+ }
+ mutex_unlock(&part->p_lock);
+
+ if (found) break;
+ }
+ mutex_unlock(&all_partitions.lock);
+ return (found) ? (p_index_t)i : P_INDEX_INVALID;
+}
+
+/*
+ * Allocate a page in a paging partition
+ * The partition is returned unlocked.
+ */
+vm_offset_t
+pager_alloc_page(pindex, lock_it)
+ p_index_t pindex;
+{
+ register int bm_e;
+ register int bit;
+ register int limit;
+ register bm_entry_t *bm;
+ partition_t part;
+ static char here[] = "%spager_alloc_page";
+
+ if (no_partition(pindex))
+ return (NO_BLOCK);
+ part = partition_of(pindex);
+
+ /* unlikely, but possible deadlock against destroy_partition */
+ if (!part || part->going_away)
+ return (NO_BLOCK);
+
+ if (lock_it)
+ mutex_lock(&part->p_lock);
+
+ if (part->free == 0) {
+ /* out of paging space */
+ mutex_unlock(&part->p_lock);
+ return (NO_BLOCK);
+ }
+
+ limit = howmany(part->total_size, NB_BM);
+ bm = part->bitmap;
+ for (bm_e = 0; bm_e < limit; bm_e++, bm++)
+ if (*bm != BM_MASK)
+ break;
+
+ if (bm_e == limit)
+ panic(here,my_name);
+
+ /*
+ * Find and set the proper bit
+ */
+ {
+ register bm_entry_t b = *bm;
+
+ for (bit = 0; bit < NB_BM; bit++)
+ if ((b & (1<<bit)) == 0)
+ break;
+ if (bit == NB_BM)
+ panic(here,my_name);
+
+ *bm = b | (1<<bit);
+ part->free--;
+
+ }
+
+ mutex_unlock(&part->p_lock);
+
+ return (bm_e*NB_BM+bit);
+}
+
+/*
+ * Deallocate a page in a paging partition
+ */
+void
+pager_dealloc_page(pindex, page, lock_it)
+ p_index_t pindex;
+ register vm_offset_t page;
+{
+ register partition_t part;
+ register int bit, bm_e;
+
+ /* be paranoid */
+ if (no_partition(pindex))
+ panic("%sdealloc_page",my_name);
+ part = partition_of(pindex);
+
+ if (page >= part->total_size)
+ panic("%sdealloc_page",my_name);
+
+ bm_e = page / NB_BM;
+ bit = page % NB_BM;
+
+ if (lock_it)
+ mutex_lock(&part->p_lock);
+
+ part->bitmap[bm_e] &= ~(1<<bit);
+ part->free++;
+
+ if (lock_it)
+ mutex_unlock(&part->p_lock);
+}
+
+/*
+
+ */
+/*
+ * Allocation info for each paging object.
+ *
+ * Most operations, even pager_write_offset and pager_put_checksum,
+ * just need a read lock. Higher-level considerations prevent
+ * conflicting operations on a single page. The lock really protects
+ * the underlying size and block map memory, so pager_extend needs a
+ * write lock.
+ *
+ * An object can now span multiple paging partitions. The allocation
+ * info we keep is a pair (offset,p_index) where the index is in the
+ * array of all partition ptrs, and the offset is partition-relative.
+ * Size wise we are doing ok fitting the pair into a single integer:
+ * the offset really is in pages so we have vm_page_size bits available
+ * for the partition index.
+ */
+#define DEBUG_READER_CONFLICTS 0
+
+#if DEBUG_READER_CONFLICTS
+int default_pager_read_conflicts = 0;
+#endif
+
+union dp_map {
+
+ struct {
+ unsigned int p_offset : 24,
+ p_index : 8;
+ } block;
+
+ union dp_map *indirect;
+};
+typedef union dp_map *dp_map_t;
+
+/* quick check for part==block==invalid */
+#define no_block(e) ((e).indirect == (dp_map_t)NO_BLOCK)
+#define invalidate_block(e) ((e).indirect = (dp_map_t)NO_BLOCK)
+
+struct dpager {
+ struct mutex lock; /* lock for extending block map */
+ /* XXX should be read-write lock */
+#if DEBUG_READER_CONFLICTS
+ int readers;
+ boolean_t writer;
+#endif
+ dp_map_t map; /* block map */
+ vm_size_t size; /* size of paging object, in pages */
+ p_index_t cur_partition;
+#ifdef CHECKSUM
+ vm_offset_t *checksum; /* checksum - parallel to block map */
+#define NO_CHECKSUM ((vm_offset_t)-1)
+#endif CHECKSUM
+};
+typedef struct dpager *dpager_t;
+
+/*
+ * A paging object uses either a one- or a two-level map of offsets
+ * into a paging partition.
+ */
+#define PAGEMAP_ENTRIES 64
+ /* number of pages in a second-level map */
+#define PAGEMAP_SIZE(npgs) ((npgs)*sizeof(vm_offset_t))
+
+#define INDIRECT_PAGEMAP_ENTRIES(npgs) \
+ ((((npgs)-1)/PAGEMAP_ENTRIES) + 1)
+#define INDIRECT_PAGEMAP_SIZE(npgs) \
+ (INDIRECT_PAGEMAP_ENTRIES(npgs) * sizeof(vm_offset_t *))
+#define INDIRECT_PAGEMAP(size) \
+ (size > PAGEMAP_ENTRIES)
+
+#define ROUNDUP_TO_PAGEMAP(npgs) \
+ (((npgs) + PAGEMAP_ENTRIES - 1) & ~(PAGEMAP_ENTRIES - 1))
+
+/*
+ * Object sizes are rounded up to the next power of 2,
+ * unless they are bigger than a given maximum size.
+ */
+vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */
+
+/*
+ * Attach a new paging object to a paging partition
+ */
+void
+pager_alloc(pager, part, size)
+ register dpager_t pager;
+ p_index_t part;
+ register vm_size_t size; /* in BYTES */
+{
+ register int i;
+ register dp_map_t mapptr, emapptr;
+
+ mutex_init(&pager->lock);
+#if DEBUG_READER_CONFLICTS
+ pager->readers = 0;
+ pager->writer = FALSE;
+#endif
+ pager->cur_partition = part;
+
+ /*
+ * Convert byte size to number of pages, then increase to the nearest
+ * power of 2.
+ */
+ size = atop(size);
+ if (size <= atop(max_doubled_size)) {
+ i = 1;
+ while (i < size)
+ i <<= 1;
+ size = i;
+ } else
+ size = ROUNDUP_TO_PAGEMAP(size);
+
+ /*
+ * Allocate and initialize the block map
+ */
+ {
+ register vm_size_t alloc_size;
+ dp_map_t init_value;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ alloc_size = INDIRECT_PAGEMAP_SIZE(size);
+ init_value = (dp_map_t)0;
+ } else {
+ alloc_size = PAGEMAP_SIZE(size);
+ init_value = (dp_map_t)NO_BLOCK;
+ }
+
+ mapptr = (dp_map_t) kalloc(alloc_size);
+ for (emapptr = &mapptr[(alloc_size-1) / sizeof(vm_offset_t)];
+ emapptr >= mapptr;
+ emapptr--)
+ emapptr->indirect = init_value;
+
+ }
+ pager->map = mapptr;
+ pager->size = size;
+
+#ifdef CHECKSUM
+ if (INDIRECT_PAGEMAP(size)) {
+ mapptr = (vm_offset_t *)
+ kalloc(INDIRECT_PAGEMAP_SIZE(size));
+ for (i = INDIRECT_PAGEMAP_ENTRIES(size); --i >= 0;)
+ mapptr[i] = 0;
+ } else {
+ mapptr = (vm_offset_t *) kalloc(PAGEMAP_SIZE(size));
+ for (i = 0; i < size; i++)
+ mapptr[i] = NO_CHECKSUM;
+ }
+ pager->checksum = mapptr;
+#endif CHECKSUM
+}
+
+/*
+ * Return size (in bytes) of space actually allocated to this pager.
+ * The pager is read-locked.
+ */
+
+vm_size_t
+pager_allocated(pager)
+ register dpager_t pager;
+{
+ vm_size_t size;
+ register dp_map_t map, emap;
+ vm_size_t asize;
+
+ size = pager->size; /* in pages */
+ asize = 0; /* allocated, in pages */
+ map = pager->map;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
+ map < emap; map++) {
+
+ register dp_map_t map2, emap2;
+
+ if ((map2 = map->indirect) == 0)
+ continue;
+
+ for (emap2 = &map2[PAGEMAP_ENTRIES];
+ map2 < emap2; map2++)
+ if ( ! no_block(*map2) )
+ asize++;
+
+ }
+ } else {
+ for (emap = &map[size]; map < emap; map++)
+ if ( ! no_block(*map) )
+ asize++;
+ }
+
+ return ptoa(asize);
+}
+
+/*
+ * Find offsets (in the object) of pages actually allocated to this pager.
+ * Returns the number of allocated pages, whether or not they all fit.
+ * The pager is read-locked.
+ */
+
+unsigned int
+pager_pages(pager, pages, numpages)
+ dpager_t pager;
+ register default_pager_page_t *pages;
+ unsigned int numpages;
+{
+ vm_size_t size;
+ dp_map_t map, emap;
+ unsigned int actual;
+ vm_offset_t offset;
+
+ size = pager->size; /* in pages */
+ map = pager->map;
+ actual = 0;
+ offset = 0;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
+ map < emap; map++) {
+
+ register dp_map_t map2, emap2;
+
+ if ((map2 = map->indirect) == 0) {
+ offset += vm_page_size * PAGEMAP_ENTRIES;
+ continue;
+ }
+ for (emap2 = &map2[PAGEMAP_ENTRIES];
+ map2 < emap2; map2++)
+ if ( ! no_block(*map2) ) {
+ if (actual++ < numpages)
+ pages++->dpp_offset = offset;
+ }
+ offset += vm_page_size;
+ }
+ } else {
+ for (emap = &map[size]; map < emap; map++)
+ if ( ! no_block(*map) ) {
+ if (actual++ < numpages)
+ pages++->dpp_offset = offset;
+ }
+ offset += vm_page_size;
+ }
+ return actual;
+}
+
+/*
+ * Extend the map for a paging object.
+ *
+ * XXX This implementation can allocate an arbitrary large amount
+ * of wired memory when extending a big block map. Because vm-privileged
+ * threads call pager_extend, this can crash the system by exhausting
+ * system memory.
+ */
+void
+pager_extend(pager, new_size)
+ register dpager_t pager;
+ register vm_size_t new_size; /* in pages */
+{
+ register dp_map_t new_mapptr;
+ register dp_map_t old_mapptr;
+ register int i;
+ register vm_size_t old_size;
+
+ mutex_lock(&pager->lock); /* XXX lock_write */
+#if DEBUG_READER_CONFLICTS
+ pager->writer = TRUE;
+#endif
+ /*
+ * Double current size until we cover new size.
+ * If object is 'too big' just use new size.
+ */
+ old_size = pager->size;
+
+ if (new_size <= atop(max_doubled_size)) {
+ i = old_size;
+ while (i < new_size)
+ i <<= 1;
+ new_size = i;
+ } else
+ new_size = ROUNDUP_TO_PAGEMAP(new_size);
+
+ if (INDIRECT_PAGEMAP(old_size)) {
+ /*
+ * Pager already uses two levels. Allocate
+ * a larger indirect block.
+ */
+ new_mapptr = (dp_map_t)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->map;
+ for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i].indirect = (dp_map_t)0;
+ kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size));
+ pager->map = new_mapptr;
+ pager->size = new_size;
+#ifdef CHECKSUM
+ new_mapptr = (vm_offset_t *)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->checksum;
+ for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i] = 0;
+ kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size));
+ pager->checksum = new_mapptr;
+#endif CHECKSUM
+#if DEBUG_READER_CONFLICTS
+ pager->writer = FALSE;
+#endif
+ mutex_unlock(&pager->lock);
+ return;
+ }
+
+ if (INDIRECT_PAGEMAP(new_size)) {
+ /*
+ * Changing from direct map to indirect map.
+ * Allocate both indirect and direct map blocks,
+ * since second-level (direct) block must be
+ * full size (PAGEMAP_SIZE(PAGEMAP_ENTRIES)).
+ */
+
+ /*
+ * Allocate new second-level map first.
+ */
+ new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ old_mapptr = pager->map;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < PAGEMAP_ENTRIES; i++)
+ invalidate_block(new_mapptr[i]);
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ old_mapptr = new_mapptr;
+
+ /*
+ * Now allocate indirect map.
+ */
+ new_mapptr = (dp_map_t)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ new_mapptr[0].indirect = old_mapptr;
+ for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i].indirect = 0;
+ pager->map = new_mapptr;
+ pager->size = new_size;
+#ifdef CHECKSUM
+ /*
+ * Allocate new second-level map first.
+ */
+ new_mapptr = (vm_offset_t *)kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ old_mapptr = pager->checksum;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < PAGEMAP_ENTRIES; i++)
+ new_mapptr[i] = NO_CHECKSUM;
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ old_mapptr = new_mapptr;
+
+ /*
+ * Now allocate indirect map.
+ */
+ new_mapptr = (vm_offset_t *)
+ kalloc(INDIRECT_PAGEMAP_SIZE(new_size));
+ new_mapptr[0] = (vm_offset_t) old_mapptr;
+ for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++)
+ new_mapptr[i] = 0;
+ pager->checksum = new_mapptr;
+#endif CHECKSUM
+#if DEBUG_READER_CONFLICTS
+ pager->writer = FALSE;
+#endif
+ mutex_unlock(&pager->lock);
+ return;
+ }
+ /*
+ * Enlarging a direct block.
+ */
+ new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->map;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < new_size; i++)
+ invalidate_block(new_mapptr[i]);
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ pager->map = new_mapptr;
+ pager->size = new_size;
+#ifdef CHECKSUM
+ new_mapptr = (vm_offset_t *)
+ kalloc(PAGEMAP_SIZE(new_size));
+ old_mapptr = pager->checksum;
+ for (i = 0; i < old_size; i++)
+ new_mapptr[i] = old_mapptr[i];
+ for (; i < new_size; i++)
+ new_mapptr[i] = NO_CHECKSUM;
+ kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size));
+ pager->checksum = new_mapptr;
+#endif CHECKSUM
+#if DEBUG_READER_CONFLICTS
+ pager->writer = FALSE;
+#endif
+ mutex_unlock(&pager->lock);
+}
+
+/*
+ * Given an offset within a paging object, find the
+ * corresponding block within the paging partition.
+ * Return NO_BLOCK if none allocated.
+ */
+union dp_map
+pager_read_offset(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register vm_offset_t f_page;
+ union dp_map pager_offset;
+
+ f_page = atop(offset);
+
+#if DEBUG_READER_CONFLICTS
+ if (pager->readers > 0)
+ default_pager_read_conflicts++; /* would have proceeded with
+ read/write lock */
+#endif
+ mutex_lock(&pager->lock); /* XXX lock_read */
+#if DEBUG_READER_CONFLICTS
+ pager->readers++;
+#endif
+ if (f_page >= pager->size)
+ {
+ printf ("%spager_read_offset pager %x: bad page %d >= size %d",
+ my_name, pager, f_page, pager->size);
+ return (union dp_map *) NO_BLOCK;
+#if 0
+ panic("%spager_read_offset",my_name);
+#endif
+ }
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register dp_map_t mapptr;
+
+ mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect;
+ if (mapptr == 0)
+ invalidate_block(pager_offset);
+ else
+ pager_offset = mapptr[f_page%PAGEMAP_ENTRIES];
+ }
+ else {
+ pager_offset = pager->map[f_page];
+ }
+
+#if DEBUG_READER_CONFLICTS
+ pager->readers--;
+#endif
+ mutex_unlock(&pager->lock);
+ return (pager_offset);
+}
+
+#if USE_PRECIOUS
+/*
+ * Release a single disk block.
+ */
+pager_release_offset(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register union dp_map entry;
+
+ offset = atop(offset);
+
+ mutex_lock(&pager->lock); /* XXX lock_read */
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register dp_map_t mapptr;
+
+ mapptr = pager->map[offset / PAGEMAP_ENTRIES].indirect;
+ entry = mapptr[offset % PAGEMAP_ENTRIES];
+ invalidate_block(mapptr[offset % PAGEMAP_ENTRIES]);
+ } else {
+ entry = pager->map[offset];
+ invalidate_block(pager->map[offset]);
+ }
+
+ mutex_unlock(&pager->lock);
+
+ pager_dealloc_page(entry.block.p_index, entry.block.p_offset, TRUE);
+}
+#endif /*USE_PRECIOUS*/
+
+
+/*
+ * Move a page from one partition to another
+ * New partition is locked, old partition is
+ * locked unless LOCK_OLD sez otherwise.
+ */
+union dp_map
+pager_move_page(block)
+ union dp_map block;
+{
+ partition_t old_part, new_part;
+ p_index_t old_pindex, new_pindex;
+ union dp_map ret;
+ vm_size_t size;
+ vm_offset_t raddr, offset, new_offset;
+ kern_return_t rc;
+ static char here[] = "%spager_move_page";
+
+ old_pindex = block.block.p_index;
+ invalidate_block(ret);
+
+ /* See if we have room to put it anywhere else */
+ new_pindex = choose_partition( ptoa(1), old_pindex);
+ if (no_partition(new_pindex))
+ return ret;
+
+ /* this unlocks the new partition */
+ new_offset = pager_alloc_page(new_pindex, FALSE);
+ if (new_offset == NO_BLOCK)
+ panic(here,my_name);
+
+ /*
+ * Got the resources, now move the data
+ */
+ old_part = partition_of(old_pindex);
+ offset = ptoa(block.block.p_offset);
+ rc = page_read_file_direct (old_part->file,
+ offset,
+ vm_page_size,
+ &raddr,
+ &size);
+ if (rc != 0)
+ panic(here,my_name);
+
+ /* release old */
+ pager_dealloc_page(old_pindex, block.block.p_offset, FALSE);
+
+ new_part = partition_of(new_pindex);
+ offset = ptoa(new_offset);
+ rc = page_write_file_direct (new_part->file,
+ offset,
+ raddr,
+ size,
+ &size);
+ if (rc != 0)
+ panic(here,my_name);
+
+ (void) vm_deallocate( mach_task_self(), raddr, size);
+
+ ret.block.p_offset = new_offset;
+ ret.block.p_index = new_pindex;
+
+ return ret;
+}
+
+#ifdef CHECKSUM
+/*
+ * Return the checksum for a block.
+ */
+int
+pager_get_checksum(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register vm_offset_t f_page;
+ int checksum;
+
+ f_page = atop(offset);
+
+ mutex_lock(&pager->lock); /* XXX lock_read */
+ if (f_page >= pager->size)
+ panic("%spager_get_checksum",my_name);
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register vm_offset_t *mapptr;
+
+ mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES];
+ if (mapptr == 0)
+ checksum = NO_CHECKSUM;
+ else
+ checksum = mapptr[f_page%PAGEMAP_ENTRIES];
+ }
+ else {
+ checksum = pager->checksum[f_page];
+ }
+
+ mutex_unlock(&pager->lock);
+ return (checksum);
+}
+
+/*
+ * Remember the checksum for a block.
+ */
+int
+pager_put_checksum(pager, offset, checksum)
+ register dpager_t pager;
+ vm_offset_t offset;
+ int checksum;
+{
+ register vm_offset_t f_page;
+ static char here[] = "%spager_put_checksum";
+
+ f_page = atop(offset);
+
+ mutex_lock(&pager->lock); /* XXX lock_read */
+ if (f_page >= pager->size)
+ panic(here,my_name);
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ register vm_offset_t *mapptr;
+
+ mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES];
+ if (mapptr == 0)
+ panic(here,my_name);
+
+ mapptr[f_page%PAGEMAP_ENTRIES] = checksum;
+ }
+ else {
+ pager->checksum[f_page] = checksum;
+ }
+ mutex_unlock(&pager->lock);
+}
+
+/*
+ * Compute a checksum - XOR each 32-bit word.
+ */
+int
+compute_checksum(addr, size)
+ vm_offset_t addr;
+ vm_size_t size;
+{
+ register int checksum = NO_CHECKSUM;
+ register int *ptr;
+ register int count;
+
+ ptr = (int *)addr;
+ count = size / sizeof(int);
+
+ while (--count >= 0)
+ checksum ^= *ptr++;
+
+ return (checksum);
+}
+#endif CHECKSUM
+
+/*
+ * Given an offset within a paging object, find the
+ * corresponding block within the paging partition.
+ * Allocate a new block if necessary.
+ *
+ * WARNING: paging objects apparently may be extended
+ * without notice!
+ */
+union dp_map
+pager_write_offset(pager, offset)
+ register dpager_t pager;
+ vm_offset_t offset;
+{
+ register vm_offset_t f_page;
+ register dp_map_t mapptr;
+ register union dp_map block;
+
+ invalidate_block(block);
+
+ f_page = atop(offset);
+
+#if DEBUG_READER_CONFLICTS
+ if (pager->readers > 0)
+ default_pager_read_conflicts++; /* would have proceeded with
+ read/write lock */
+#endif
+ mutex_lock(&pager->lock); /* XXX lock_read */
+#if DEBUG_READER_CONFLICTS
+ pager->readers++;
+#endif
+
+ /* Catch the case where we had no initial fit partition
+ for this object, but one was added later on */
+ if (no_partition(pager->cur_partition)) {
+ p_index_t new_part;
+ vm_size_t size;
+
+ size = (f_page > pager->size) ? f_page : pager->size;
+ new_part = choose_partition(ptoa(size), P_INDEX_INVALID);
+ if (no_partition(new_part))
+ new_part = choose_partition(ptoa(1), P_INDEX_INVALID);
+ if (no_partition(new_part))
+ /* give up right now to avoid confusion */
+ goto out;
+ else
+ pager->cur_partition = new_part;
+ }
+
+ while (f_page >= pager->size) {
+ /*
+ * Paging object must be extended.
+ * Remember that offset is 0-based, but size is 1-based.
+ */
+#if DEBUG_READER_CONFLICTS
+ pager->readers--;
+#endif
+ mutex_unlock(&pager->lock);
+ pager_extend(pager, f_page + 1);
+#if DEBUG_READER_CONFLICTS
+ if (pager->readers > 0)
+ default_pager_read_conflicts++; /* would have proceeded with
+ read/write lock */
+#endif
+ mutex_lock(&pager->lock); /* XXX lock_read */
+#if DEBUG_READER_CONFLICTS
+ pager->readers++;
+#endif
+ }
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+
+ mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect;
+ if (mapptr == 0) {
+ /*
+ * Allocate the indirect block
+ */
+ register int i;
+
+ mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ if (mapptr == 0) {
+ /* out of space! */
+ no_paging_space(TRUE);
+ goto out;
+ }
+ pager->map[f_page/PAGEMAP_ENTRIES].indirect = mapptr;
+ for (i = 0; i < PAGEMAP_ENTRIES; i++)
+ invalidate_block(mapptr[i]);
+#ifdef CHECKSUM
+ {
+ register vm_offset_t *cksumptr;
+ register int j;
+
+ cksumptr = (vm_offset_t *)
+ kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ if (cksumptr == 0) {
+ /* out of space! */
+ no_paging_space(TRUE);
+ goto out;
+ }
+ pager->checksum[f_page/PAGEMAP_ENTRIES]
+ = (vm_offset_t)cksumptr;
+ for (j = 0; j < PAGEMAP_ENTRIES; j++)
+ cksumptr[j] = NO_CHECKSUM;
+ }
+#endif CHECKSUM
+ }
+ f_page %= PAGEMAP_ENTRIES;
+ }
+ else {
+ mapptr = pager->map;
+ }
+
+ block = mapptr[f_page];
+ if (no_block(block)) {
+ vm_offset_t off;
+
+ /* get room now */
+ off = pager_alloc_page(pager->cur_partition, TRUE);
+ if (off == NO_BLOCK) {
+ /*
+ * Before giving up, try all other partitions.
+ */
+ p_index_t new_part;
+
+ /* returns it locked (if any one is non-full) */
+ new_part = choose_partition( ptoa(1), pager->cur_partition);
+ if ( ! no_partition(new_part) ) {
+
+#if debug
+printf("%s partition %x filled,", my_name, pager->cur_partition);
+printf("extending object %x (size %x) to %x.\n",
+ pager, pager->size, new_part);
+#endif
+
+ /* this one tastes better */
+ pager->cur_partition = new_part;
+
+ /* this unlocks the partition too */
+ off = pager_alloc_page(pager->cur_partition, FALSE);
+
+ }
+
+ if (off == NO_BLOCK) {
+ /*
+ * Oh well.
+ */
+ overcommitted(FALSE, 1);
+ goto out;
+ }
+ }
+ block.block.p_offset = off;
+ block.block.p_index = pager->cur_partition;
+ mapptr[f_page] = block;
+ }
+
+out:
+
+#if DEBUG_READER_CONFLICTS
+ pager->readers--;
+#endif
+ mutex_unlock(&pager->lock);
+ return (block);
+}
+
+/*
+ * Deallocate all of the blocks belonging to a paging object.
+ * No locking needed because no other operations can be in progress.
+ */
+void
+pager_dealloc(pager)
+ register dpager_t pager;
+{
+ register int i, j;
+ register dp_map_t mapptr;
+ register union dp_map block;
+
+ if (INDIRECT_PAGEMAP(pager->size)) {
+ for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) {
+ mapptr = pager->map[i].indirect;
+ if (mapptr != 0) {
+ for (j = 0; j < PAGEMAP_ENTRIES; j++) {
+ block = mapptr[j];
+ if ( ! no_block(block) )
+ pager_dealloc_page(block.block.p_index,
+ block.block.p_offset, TRUE);
+ }
+ kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ }
+ }
+ kfree((char *)pager->map, INDIRECT_PAGEMAP_SIZE(pager->size));
+#ifdef CHECKSUM
+ for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) {
+ mapptr = (vm_offset_t *)pager->checksum[i];
+ if (mapptr) {
+ kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES));
+ }
+ }
+ kfree((char *)pager->checksum,
+ INDIRECT_PAGEMAP_SIZE(pager->size));
+#endif CHECKSUM
+ }
+ else {
+ mapptr = pager->map;
+ for (i = 0; i < pager->size; i++ ) {
+ block = mapptr[i];
+ if ( ! no_block(block) )
+ pager_dealloc_page(block.block.p_index,
+ block.block.p_offset, TRUE);
+ }
+ kfree((char *)pager->map, PAGEMAP_SIZE(pager->size));
+#ifdef CHECKSUM
+ kfree((char *)pager->checksum, PAGEMAP_SIZE(pager->size));
+#endif CHECKSUM
+ }
+}
+
+/*
+ * Move all the pages of a PAGER that live in a
+ * partition PINDEX somewhere else.
+ * Pager should be write-locked, partition too.
+ * Returns FALSE if it could not do it, but
+ * some pages might have been moved nonetheless.
+ */
+boolean_t
+pager_realloc(pager, pindex)
+ register dpager_t pager;
+ p_index_t pindex;
+{
+ register dp_map_t map, emap;
+ vm_size_t size;
+ union dp_map block;
+
+ size = pager->size; /* in pages */
+ map = pager->map;
+
+ if (INDIRECT_PAGEMAP(size)) {
+ for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)];
+ map < emap; map++) {
+
+ register dp_map_t map2, emap2;
+
+ if ((map2 = map->indirect) == 0)
+ continue;
+
+ for (emap2 = &map2[PAGEMAP_ENTRIES];
+ map2 < emap2; map2++)
+ if ( map2->block.p_index == pindex) {
+
+ block = pager_move_page(*map2);
+ if (!no_block(block))
+ *map2 = block;
+ else
+ return FALSE;
+ }
+
+ }
+ goto ok;
+ }
+
+ /* A small one */
+ for (emap = &map[size]; map < emap; map++)
+ if (map->block.p_index == pindex) {
+ block = pager_move_page(*map);
+ if (!no_block(block))
+ *map = block;
+ else
+ return FALSE;
+ }
+ok:
+ pager->cur_partition = choose_partition(0, P_INDEX_INVALID);
+ return TRUE;
+}
+
+/*
+
+ */
+
+/*
+ * Read/write routines.
+ */
+#define PAGER_SUCCESS 0
+#define PAGER_ABSENT 1
+#define PAGER_ERROR 2
+
+/*
+ * Read data from a default pager. Addr is the address of a buffer
+ * to fill. Out_addr returns the buffer that contains the data;
+ * if it is different from <addr>, it must be deallocated after use.
+ */
+int
+default_read(ds, addr, size, offset, out_addr, deallocate)
+ register dpager_t ds;
+ vm_offset_t addr; /* pointer to block to fill */
+ register vm_size_t size;
+ register vm_offset_t offset;
+ vm_offset_t *out_addr;
+ /* returns pointer to data */
+ boolean_t deallocate;
+{
+ register union dp_map block;
+ vm_offset_t raddr;
+ vm_size_t rsize;
+ register int rc;
+ boolean_t first_time;
+ register partition_t part;
+#ifdef CHECKSUM
+ vm_size_t original_size = size;
+#endif CHECKSUM
+ vm_offset_t original_offset = offset;
+
+ /*
+ * Find the block in the paging partition
+ */
+ block = pager_read_offset(ds, offset);
+ if ( no_block(block) )
+ return (PAGER_ABSENT);
+
+ /*
+ * Read it, trying for the entire page.
+ */
+ offset = ptoa(block.block.p_offset);
+ part = partition_of(block.block.p_index);
+ first_time = TRUE;
+ *out_addr = addr;
+
+ do {
+ rc = page_read_file_direct(part->file,
+ offset,
+ size,
+ &raddr,
+ &rsize);
+ if (rc != 0)
+ return (PAGER_ERROR);
+
+ /*
+ * If we got the entire page on the first read, return it.
+ */
+ if (first_time && rsize == size) {
+ *out_addr = raddr;
+ break;
+ }
+ /*
+ * Otherwise, copy the data into the
+ * buffer we were passed, and try for
+ * the next piece.
+ */
+ first_time = FALSE;
+ bcopy((char *)raddr, (char *)addr, rsize);
+ addr += rsize;
+ offset += rsize;
+ size -= rsize;
+ } while (size != 0);
+
+#if USE_PRECIOUS
+ if (deallocate)
+ pager_release_offset(ds, original_offset);
+#endif /*USE_PRECIOUS*/
+
+#ifdef CHECKSUM
+ {
+ int write_checksum,
+ read_checksum;
+
+ write_checksum = pager_get_checksum(ds, original_offset);
+ read_checksum = compute_checksum(*out_addr, original_size);
+ if (write_checksum != read_checksum) {
+ panic(
+ "PAGER CHECKSUM ERROR: offset 0x%x, written 0x%x, read 0x%x",
+ original_offset, write_checksum, read_checksum);
+ }
+ }
+#endif CHECKSUM
+ return (PAGER_SUCCESS);
+}
+
+int
+default_write(ds, addr, size, offset)
+ register dpager_t ds;
+ register vm_offset_t addr;
+ register vm_size_t size;
+ register vm_offset_t offset;
+{
+ register union dp_map block;
+ partition_t part;
+ vm_size_t wsize;
+ register int rc;
+
+ /*
+ * Find block in paging partition
+ */
+ block = pager_write_offset(ds, offset);
+ if ( no_block(block) )
+ return (PAGER_ERROR);
+
+#ifdef CHECKSUM
+ /*
+ * Save checksum
+ */
+ {
+ int checksum;
+
+ checksum = compute_checksum(addr, size);
+ pager_put_checksum(ds, offset, checksum);
+ }
+#endif CHECKSUM
+ offset = ptoa(block.block.p_offset);
+ part = partition_of(block.block.p_index);
+
+ /*
+ * There are various assumptions made here,we
+ * will not get into the next disk 'block' by
+ * accident. It might well be non-contiguous.
+ */
+ do {
+ rc = page_write_file_direct(part->file,
+ offset,
+ addr,
+ size,
+ &wsize);
+ if (rc != 0) {
+ printf("*** PAGER ERROR: default_write: ");
+ printf("ds=0x%x addr=0x%x size=0x%x offset=0x%x resid=0x%x\n",
+ ds, addr, size, offset, wsize);
+ return (PAGER_ERROR);
+ }
+ addr += wsize;
+ offset += wsize;
+ size -= wsize;
+ } while (size != 0);
+ return (PAGER_SUCCESS);
+}
+
+boolean_t
+default_has_page(ds, offset)
+ dpager_t ds;
+ vm_offset_t offset;
+{
+ return ( ! no_block(pager_read_offset(ds, offset)) );
+}
+
+/*
+
+ */
+
+/*
+ * Mapping between pager port and paging object.
+ */
+struct dstruct {
+ queue_chain_t links; /* Link in pager-port list */
+
+ struct mutex lock; /* Lock for the structure */
+ struct condition
+ waiting_seqno, /* someone waiting on seqno */
+ waiting_read, /* someone waiting on readers */
+ waiting_write, /* someone waiting on writers */
+ waiting_refs; /* someone waiting on refs */
+
+ memory_object_t pager; /* Pager port */
+ mach_port_seqno_t seqno; /* Pager port sequence number */
+ mach_port_t pager_request; /* Request port */
+ mach_port_urefs_t request_refs; /* Request port user-refs */
+ mach_port_t pager_name; /* Name port */
+ mach_port_urefs_t name_refs; /* Name port user-refs */
+
+ unsigned int readers; /* Reads in progress */
+ unsigned int writers; /* Writes in progress */
+
+ unsigned int errors; /* Pageout error count */
+ struct dpager dpager; /* Actual pager */
+};
+typedef struct dstruct * default_pager_t;
+#define DEFAULT_PAGER_NULL ((default_pager_t)0)
+
+#if PARALLEL
+#define dstruct_lock_init(ds) mutex_init(&ds->lock)
+#define dstruct_lock(ds) mutex_lock(&ds->lock)
+#define dstruct_unlock(ds) mutex_unlock(&ds->lock)
+#else /* PARALLEL */
+#define dstruct_lock_init(ds)
+#define dstruct_lock(ds)
+#define dstruct_unlock(ds)
+#endif /* PARALLEL */
+
+/*
+ * List of all pagers. A specific pager is
+ * found directly via its port, this list is
+ * only used for monitoring purposes by the
+ * default_pager_object* calls
+ */
+struct pager_port {
+ queue_head_t queue;
+ struct mutex lock;
+ int count; /* saves code */
+ queue_head_t leak_queue;
+} all_pagers;
+
+#define pager_port_list_init() \
+{ \
+ mutex_init(&all_pagers.lock); \
+ queue_init(&all_pagers.queue); \
+ queue_init(&all_pagers.leak_queue); \
+ all_pagers.count = 0; \
+}
+
+void pager_port_list_insert(port, ds)
+ mach_port_t port;
+ default_pager_t ds;
+{
+ mutex_lock(&all_pagers.lock);
+ queue_enter(&all_pagers.queue, ds, default_pager_t, links);
+ all_pagers.count++;
+ mutex_unlock(&all_pagers.lock);
+}
+
+/* given a data structure return a good port-name to associate it to */
+#define pnameof(_x_) (((vm_offset_t)(_x_))+1)
+/* reverse, assumes no-odd-pointers */
+#define dnameof(_x_) (((vm_offset_t)(_x_))&~1)
+
+/* The magic typecast */
+#define pager_port_lookup(_port_) \
+ ((! MACH_PORT_VALID(_port_) || \
+ ((default_pager_t)dnameof(_port_))->pager != (_port_)) ? \
+ DEFAULT_PAGER_NULL : (default_pager_t)dnameof(_port_))
+
+void pager_port_list_delete(ds)
+ default_pager_t ds;
+{
+ mutex_lock(&all_pagers.lock);
+ queue_remove(&all_pagers.queue, ds, default_pager_t, links);
+ all_pagers.count--;
+ mutex_unlock(&all_pagers.lock);
+}
+
+/*
+ * Destroy a paging partition.
+ * XXX this is not re-entrant XXX
+ */
+kern_return_t
+destroy_paging_partition(name, pp_private)
+ char *name;
+ void **pp_private;
+{
+ register unsigned int id = part_id(name);
+ register partition_t part;
+ boolean_t all_ok = TRUE;
+ default_pager_t entry;
+ int pindex;
+
+ /*
+ * Find and take partition out of list
+ * This prevents choose_partition from
+ * getting in the way.
+ */
+ mutex_lock(&all_partitions.lock);
+ for (pindex = 0; pindex < all_partitions.n_partitions; pindex++) {
+ part = partition_of(pindex);
+ if (part && (part->id == id)) break;
+ }
+ if (pindex == all_partitions.n_partitions) {
+ mutex_unlock(&all_partitions.lock);
+ return KERN_INVALID_ARGUMENT;
+ }
+ part->going_away = TRUE;
+ mutex_unlock(&all_partitions.lock);
+
+ /*
+ * This might take a while..
+ */
+all_over_again:
+#if debug
+printf("Partition x%x (id x%x) for %s, all_ok %d\n", part, id, name, all_ok);
+#endif
+ all_ok = TRUE;
+ mutex_lock(&part->p_lock);
+
+ mutex_lock(&all_pagers.lock);
+ queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
+
+ dstruct_lock(entry);
+
+ if (!mutex_try_lock(&entry->dpager.lock)) {
+
+ dstruct_unlock(entry);
+ mutex_unlock(&all_pagers.lock);
+ mutex_unlock(&part->p_lock);
+
+ /* yield the processor */
+ (void) thread_switch(MACH_PORT_NULL,
+ SWITCH_OPTION_NONE, 0);
+
+ goto all_over_again;
+
+ }
+
+ /*
+ * See if we can relocate all the pages of this object
+ * currently on this partition on some other partition
+ */
+ all_ok = pager_realloc(&entry->dpager, pindex);
+
+ mutex_unlock(&entry->dpager.lock);
+ dstruct_unlock(entry);
+
+ if (!all_ok) break;
+
+ }
+ mutex_unlock(&all_pagers.lock);
+
+ if (all_ok) {
+ /* No need to unlock partition, there are no refs left */
+
+ set_partition_of(pindex, 0);
+ *pp_private = part->file;
+ kfree(part->bitmap, howmany(part->total_size, NB_BM) * sizeof(bm_entry_t));
+ kfree(part, sizeof(struct part));
+ printf("%s Removed paging partition %s\n", my_name, name);
+ return KERN_SUCCESS;
+ }
+
+ /*
+ * Put partition back in.
+ */
+ part->going_away = FALSE;
+
+ return KERN_FAILURE;
+}
+
+
+/*
+ * We use the sequence numbers on requests to regulate
+ * our parallelism. In general, we allow multiple reads and writes
+ * to proceed in parallel, with the exception that reads must
+ * wait for previous writes to finish. (Because the kernel might
+ * generate a data-request for a page on the heels of a data-write
+ * for the same page, and we must avoid returning stale data.)
+ * terminate requests wait for proceeding reads and writes to finish.
+ */
+
+unsigned int default_pager_total = 0; /* debugging */
+unsigned int default_pager_wait_seqno = 0; /* debugging */
+unsigned int default_pager_wait_read = 0; /* debugging */
+unsigned int default_pager_wait_write = 0; /* debugging */
+unsigned int default_pager_wait_refs = 0; /* debugging */
+
+#if PARALLEL
+/*
+ * Waits for correct sequence number. Leaves pager locked.
+ */
+void pager_port_lock(ds, seqno)
+ default_pager_t ds;
+ mach_port_seqno_t seqno;
+{
+ default_pager_total++;
+ dstruct_lock(ds);
+ while (ds->seqno != seqno) {
+ default_pager_wait_seqno++;
+ condition_wait(&ds->waiting_seqno, &ds->lock);
+ }
+}
+
+/*
+ * Increments sequence number and unlocks pager.
+ */
+void pager_port_unlock(ds)
+ default_pager_t ds;
+{
+ ds->seqno++;
+ dstruct_unlock(ds);
+ condition_broadcast(&ds->waiting_seqno);
+}
+
+/*
+ * Start a read - one more reader. Pager must be locked.
+ */
+void pager_port_start_read(ds)
+ default_pager_t ds;
+{
+ ds->readers++;
+}
+
+/*
+ * Wait for readers. Unlocks and relocks pager if wait needed.
+ */
+void pager_port_wait_for_readers(ds)
+ default_pager_t ds;
+{
+ while (ds->readers != 0) {
+ default_pager_wait_read++;
+ condition_wait(&ds->waiting_read, &ds->lock);
+ }
+}
+
+/*
+ * Finish a read. Pager is unlocked and returns unlocked.
+ */
+void pager_port_finish_read(ds)
+ default_pager_t ds;
+{
+ dstruct_lock(ds);
+ if (--ds->readers == 0) {
+ dstruct_unlock(ds);
+ condition_broadcast(&ds->waiting_read);
+ }
+ else {
+ dstruct_unlock(ds);
+ }
+}
+
+/*
+ * Start a write - one more writer. Pager must be locked.
+ */
+void pager_port_start_write(ds)
+ default_pager_t ds;
+{
+ ds->writers++;
+}
+
+/*
+ * Wait for writers. Unlocks and relocks pager if wait needed.
+ */
+void pager_port_wait_for_writers(ds)
+ default_pager_t ds;
+{
+ while (ds->writers != 0) {
+ default_pager_wait_write++;
+ condition_wait(&ds->waiting_write, &ds->lock);
+ }
+}
+
+/*
+ * Finish a write. Pager is unlocked and returns unlocked.
+ */
+void pager_port_finish_write(ds)
+ default_pager_t ds;
+{
+ dstruct_lock(ds);
+ if (--ds->writers == 0) {
+ dstruct_unlock(ds);
+ condition_broadcast(&ds->waiting_write);
+ }
+ else {
+ dstruct_unlock(ds);
+ }
+}
+
+/*
+ * Wait for concurrent default_pager_objects.
+ * Unlocks and relocks pager if wait needed.
+ */
+void pager_port_wait_for_refs(ds)
+ default_pager_t ds;
+{
+ while (ds->name_refs == 0) {
+ default_pager_wait_refs++;
+ condition_wait(&ds->waiting_refs, &ds->lock);
+ }
+}
+
+/*
+ * Finished creating name refs - wake up waiters.
+ */
+void pager_port_finish_refs(ds)
+ default_pager_t ds;
+{
+ condition_broadcast(&ds->waiting_refs);
+}
+
+#else /* PARALLEL */
+
+#define pager_port_lock(ds,seqno)
+#define pager_port_unlock(ds)
+#define pager_port_start_read(ds)
+#define pager_port_wait_for_readers(ds)
+#define pager_port_finish_read(ds)
+#define pager_port_start_write(ds)
+#define pager_port_wait_for_writers(ds)
+#define pager_port_finish_write(ds)
+#define pager_port_wait_for_refs(ds)
+#define pager_port_finish_refs(ds)
+
+#endif /* PARALLEL */
+
+/*
+ * Default pager.
+ */
+task_t default_pager_self; /* Our task port. */
+
+mach_port_t default_pager_default_port; /* Port for memory_object_create. */
+thread_t default_pager_default_thread; /* Thread for default_port. */
+
+/* We catch exceptions on ourself & startup using this port. */
+mach_port_t default_pager_exception_port;
+/* We receive bootstrap requests on this port. */
+mach_port_t default_pager_bootstrap_port;
+
+mach_port_t default_pager_internal_set; /* Port set for internal objects. */
+mach_port_t default_pager_external_set; /* Port set for external objects. */
+mach_port_t default_pager_default_set; /* Port set for "default" thread. */
+
+typedef struct default_pager_thread {
+ cthread_t dpt_thread; /* Server thread. */
+ vm_offset_t dpt_buffer; /* Read buffer. */
+ boolean_t dpt_internal; /* Do we handle internal objects? */
+} default_pager_thread_t;
+
+#if PARALLEL
+ /* determine number of threads at run time */
+#define DEFAULT_PAGER_INTERNAL_COUNT (0)
+
+#else /* PARALLEL */
+#define DEFAULT_PAGER_INTERNAL_COUNT (1)
+#endif /* PARALLEL */
+
+/* Memory created by default_pager_object_create should mostly be resident. */
+#define DEFAULT_PAGER_EXTERNAL_COUNT (1)
+
+unsigned int default_pager_internal_count = DEFAULT_PAGER_INTERNAL_COUNT;
+ /* Number of "internal" threads. */
+unsigned int default_pager_external_count = DEFAULT_PAGER_EXTERNAL_COUNT;
+ /* Number of "external" threads. */
+
+default_pager_t pager_port_alloc(size)
+ vm_size_t size;
+{
+ default_pager_t ds;
+ p_index_t part;
+
+ ds = (default_pager_t) kalloc(sizeof *ds);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic("%spager_port_alloc",my_name);
+ bzero((char *) ds, sizeof *ds);
+
+ dstruct_lock_init(ds);
+
+ /*
+ * Get a suitable partition. If none big enough
+ * just pick one and overcommit. If no partitions
+ * at all.. well just fake one so that we will
+ * kill specific objects on pageouts rather than
+ * panicing the system now.
+ */
+ part = choose_partition(size, P_INDEX_INVALID);
+ if (no_partition(part)) {
+ overcommitted(FALSE, atop(size));
+ part = choose_partition(0,P_INDEX_INVALID);
+#if debug
+ if (no_partition(part))
+ printf("%s No paging space at all !!\n", my_name);
+#endif
+ }
+ pager_alloc(&ds->dpager, part, size);
+
+ return ds;
+}
+
+mach_port_urefs_t default_pager_max_urefs = 10000;
+
+/*
+ * Check user reference count on pager_request port.
+ * Pager must be locked.
+ * Unlocks and re-locks pager if needs to call kernel.
+ */
+void pager_port_check_request(ds, pager_request)
+ default_pager_t ds;
+ mach_port_t pager_request;
+{
+ mach_port_delta_t delta;
+ kern_return_t kr;
+
+ assert(ds->pager_request == pager_request);
+
+ if (++ds->request_refs > default_pager_max_urefs) {
+ delta = 1 - ds->request_refs;
+ ds->request_refs = 1;
+
+ dstruct_unlock(ds);
+
+ /*
+ * Deallocate excess user references.
+ */
+
+ kr = mach_port_mod_refs(default_pager_self, pager_request,
+ MACH_PORT_RIGHT_SEND, delta);
+ if (kr != KERN_SUCCESS)
+ panic("%spager_port_check_request",my_name);
+
+ dstruct_lock(ds);
+ }
+}
+
+void default_pager_add(ds, internal)
+ default_pager_t ds;
+ boolean_t internal;
+{
+ mach_port_t pager = ds->pager;
+ mach_port_t pset;
+ mach_port_mscount_t sync;
+ mach_port_t previous;
+ kern_return_t kr;
+ static char here[] = "%sdefault_pager_add";
+
+ /*
+ * The port currently has a make-send count of zero,
+ * because either we just created the port or we just
+ * received the port in a memory_object_create request.
+ */
+
+ if (internal) {
+ /* possibly generate an immediate no-senders notification */
+ sync = 0;
+ pset = default_pager_internal_set;
+ } else {
+ /* delay notification till send right is created */
+ sync = 1;
+ pset = default_pager_external_set;
+ }
+
+ kr = mach_port_request_notification(default_pager_self, pager,
+ MACH_NOTIFY_NO_SENDERS, sync,
+ pager, MACH_MSG_TYPE_MAKE_SEND_ONCE,
+ &previous);
+ if ((kr != KERN_SUCCESS) || (previous != MACH_PORT_NULL))
+ panic(here,my_name);
+
+ kr = mach_port_move_member(default_pager_self, pager, pset);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+}
+
+/*
+ * Routine: memory_object_create
+ * Purpose:
+ * Handle requests for memory objects from the
+ * kernel.
+ * Notes:
+ * Because we only give out the default memory
+ * manager port to the kernel, we don't have to
+ * be so paranoid about the contents.
+ */
+kern_return_t
+seqnos_memory_object_create(old_pager, seqno, new_pager, new_size,
+ new_pager_request, new_pager_name, new_page_size)
+ mach_port_t old_pager;
+ mach_port_seqno_t seqno;
+ mach_port_t new_pager;
+ vm_size_t new_size;
+ mach_port_t new_pager_request;
+ mach_port_t new_pager_name;
+ vm_size_t new_page_size;
+{
+ register default_pager_t ds;
+ kern_return_t kr;
+
+ assert(old_pager == default_pager_default_port);
+ assert(MACH_PORT_VALID(new_pager_request));
+ assert(MACH_PORT_VALID(new_pager_name));
+ assert(new_page_size == vm_page_size);
+
+ ds = pager_port_alloc(new_size);
+rename_it:
+ kr = mach_port_rename( default_pager_self,
+ new_pager, (mach_port_t)pnameof(ds));
+ if (kr != KERN_SUCCESS) {
+ default_pager_t ds1;
+
+ if (kr != KERN_NAME_EXISTS)
+ panic("%s m_o_create", my_name);
+ ds1 = (default_pager_t) kalloc(sizeof *ds1);
+ *ds1 = *ds;
+ mutex_lock(&all_pagers.lock);
+ queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links);
+ mutex_unlock(&all_pagers.lock);
+ ds = ds1;
+ goto rename_it;
+ }
+
+ new_pager = (mach_port_t) pnameof(ds);
+
+ /*
+ * Set up associations between these ports
+ * and this default_pager structure
+ */
+
+ ds->pager = new_pager;
+ ds->pager_request = new_pager_request;
+ ds->request_refs = 1;
+ ds->pager_name = new_pager_name;
+ ds->name_refs = 1;
+
+ /*
+ * After this, other threads might receive requests
+ * for this memory object or find it in the port list.
+ */
+
+ pager_port_list_insert(new_pager, ds);
+ default_pager_add(ds, TRUE);
+
+ return(KERN_SUCCESS);
+}
+
+memory_object_copy_strategy_t default_pager_copy_strategy =
+ MEMORY_OBJECT_COPY_DELAY;
+
+kern_return_t
+seqnos_memory_object_init(pager, seqno, pager_request, pager_name,
+ pager_page_size)
+ mach_port_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ mach_port_t pager_name;
+ vm_size_t pager_page_size;
+{
+ register default_pager_t ds;
+ kern_return_t kr;
+ static char here[] = "%sinit";
+
+ assert(MACH_PORT_VALID(pager_request));
+ assert(MACH_PORT_VALID(pager_name));
+ assert(pager_page_size == vm_page_size);
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here, my_name);
+ pager_port_lock(ds, seqno);
+
+ if (ds->pager_request != MACH_PORT_NULL)
+ panic(here, my_name);
+
+ ds->pager_request = pager_request;
+ ds->request_refs = 1;
+ ds->pager_name = pager_name;
+ ds->name_refs = 1;
+
+ /*
+ * Even if the kernel immediately terminates the object,
+ * the pager_request port won't be destroyed until
+ * we process the terminate request, which won't happen
+ * until we unlock the object.
+ */
+
+ kr = memory_object_set_attributes(pager_request,
+ TRUE,
+ FALSE, /* do not cache */
+ default_pager_copy_strategy);
+ if (kr != KERN_SUCCESS)
+ panic(here, my_name);
+
+ pager_port_unlock(ds);
+
+ return(KERN_SUCCESS);
+}
+
+kern_return_t
+seqnos_memory_object_terminate(pager, seqno, pager_request, pager_name)
+ mach_port_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ mach_port_t pager_name;
+{
+ register default_pager_t ds;
+ mach_port_urefs_t request_refs, name_refs;
+ kern_return_t kr;
+ static char here[] = "%sterminate";
+
+ /*
+ * pager_request and pager_name are receive rights,
+ * not send rights.
+ */
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here, my_name);
+ pager_port_lock(ds, seqno);
+
+ /*
+ * Wait for read and write requests to terminate.
+ */
+
+ pager_port_wait_for_readers(ds);
+ pager_port_wait_for_writers(ds);
+
+ /*
+ * After memory_object_terminate both memory_object_init
+ * and a no-senders notification are possible, so we need
+ * to clean up the request and name ports but leave
+ * the pager port.
+ *
+ * A concurrent default_pager_objects might be allocating
+ * more references for the name port. In this case,
+ * we must first wait for it to finish.
+ */
+
+ pager_port_wait_for_refs(ds);
+
+ ds->pager_request = MACH_PORT_NULL;
+ request_refs = ds->request_refs;
+ ds->request_refs = 0;
+ assert(ds->pager_name == pager_name);
+ ds->pager_name = MACH_PORT_NULL;
+ name_refs = ds->name_refs;
+ ds->name_refs = 0;
+ pager_port_unlock(ds);
+
+ /*
+ * Now we deallocate our various port rights.
+ */
+
+ kr = mach_port_mod_refs(default_pager_self, pager_request,
+ MACH_PORT_RIGHT_SEND, -request_refs);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ kr = mach_port_mod_refs(default_pager_self, pager_request,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ kr = mach_port_mod_refs(default_pager_self, pager_name,
+ MACH_PORT_RIGHT_SEND, -name_refs);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ kr = mach_port_mod_refs(default_pager_self, pager_name,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ return (KERN_SUCCESS);
+}
+
+void default_pager_no_senders(pager, seqno, mscount)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_mscount_t mscount;
+{
+ register default_pager_t ds;
+ kern_return_t kr;
+ static char here[] = "%sno_senders";
+
+ /*
+ * Because we don't give out multiple send rights
+ * for a memory object, there can't be a race
+ * between getting a no-senders notification
+ * and creating a new send right for the object.
+ * Hence we don't keep track of mscount.
+ */
+
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ pager_port_lock(ds, seqno);
+
+ /*
+ * We shouldn't get a no-senders notification
+ * when the kernel has the object cached.
+ */
+
+ if (ds->pager_request != MACH_PORT_NULL)
+ panic(here,my_name);
+
+ /*
+ * Unlock the pager (though there should be no one
+ * waiting for it).
+ */
+ dstruct_unlock(ds);
+
+ /*
+ * Remove the memory object port association, and then
+ * the destroy the port itself. We must remove the object
+ * from the port list before deallocating the pager,
+ * because of default_pager_objects.
+ */
+
+ pager_port_list_delete(ds);
+ pager_dealloc(&ds->dpager);
+
+ kr = mach_port_mod_refs(default_pager_self, pager,
+ MACH_PORT_RIGHT_RECEIVE, -1);
+ if (kr != KERN_SUCCESS)
+ panic(here,my_name);
+
+ /*
+ * Do this *after* deallocating the port name
+ */
+ kfree((char *) ds, sizeof(*ds));
+
+ /*
+ * Recover memory that we might have wasted because
+ * of name conflicts
+ */
+ mutex_lock(&all_pagers.lock);
+
+ while (!queue_empty(&all_pagers.leak_queue)) {
+
+ ds = (default_pager_t) queue_first(&all_pagers.leak_queue);
+ queue_remove_first(&all_pagers.leak_queue, ds, default_pager_t, links);
+ kfree((char *) ds, sizeof(*ds));
+ }
+
+ mutex_unlock(&all_pagers.lock);
+}
+
+int default_pager_pagein_count = 0;
+int default_pager_pageout_count = 0;
+
+kern_return_t
+seqnos_memory_object_data_request(pager, seqno, reply_to, offset,
+ length, protection_required)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t reply_to;
+ vm_offset_t offset;
+ vm_size_t length;
+ vm_prot_t protection_required;
+{
+ default_pager_thread_t *dpt;
+ default_pager_t ds;
+ vm_offset_t addr;
+ unsigned int errors;
+ kern_return_t rc;
+ static char here[] = "%sdata_request";
+
+ dpt = (default_pager_thread_t *) cthread_data(cthread_self());
+
+ if (length != vm_page_size)
+ panic(here,my_name);
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ pager_port_lock(ds, seqno);
+ pager_port_check_request(ds, reply_to);
+ pager_port_wait_for_writers(ds);
+ pager_port_start_read(ds);
+
+ /*
+ * Get error count while pager locked.
+ */
+ errors = ds->errors;
+
+ pager_port_unlock(ds);
+
+ if (errors) {
+ printf("%s %s\n", my_name,
+ "dropping data_request because of previous paging errors");
+ (void) memory_object_data_error(reply_to,
+ offset, vm_page_size,
+ KERN_FAILURE);
+ goto done;
+ }
+
+ rc = default_read(&ds->dpager, dpt->dpt_buffer,
+ vm_page_size, offset,
+ &addr, protection_required & VM_PROT_WRITE);
+
+ switch (rc) {
+ case PAGER_SUCCESS:
+ if (addr != dpt->dpt_buffer) {
+ /*
+ * Deallocates data buffer
+ */
+ (void) memory_object_data_supply(
+ reply_to, offset,
+ addr, vm_page_size, TRUE,
+ VM_PROT_NONE,
+ FALSE, MACH_PORT_NULL);
+ } else {
+ (void) memory_object_data_provided(
+ reply_to, offset,
+ addr, vm_page_size,
+ VM_PROT_NONE);
+ }
+ break;
+
+ case PAGER_ABSENT:
+ (void) memory_object_data_unavailable(
+ reply_to,
+ offset,
+ vm_page_size);
+ break;
+
+ case PAGER_ERROR:
+ (void) memory_object_data_error(
+ reply_to,
+ offset,
+ vm_page_size,
+ KERN_FAILURE);
+ break;
+ }
+
+ default_pager_pagein_count++;
+
+ done:
+ pager_port_finish_read(ds);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * memory_object_data_initialize: check whether we already have each page, and
+ * write it if we do not. The implementation is far from optimized, and
+ * also assumes that the default_pager is single-threaded.
+ */
+kern_return_t
+seqnos_memory_object_data_initialize(pager, seqno, pager_request,
+ offset, addr, data_cnt)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ register
+ vm_offset_t offset;
+ register
+ pointer_t addr;
+ vm_size_t data_cnt;
+{
+ vm_offset_t amount_sent;
+ default_pager_t ds;
+ static char here[] = "%sdata_initialize";
+
+#ifdef lint
+ pager_request++;
+#endif lint
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ pager_port_lock(ds, seqno);
+ pager_port_check_request(ds, pager_request);
+ pager_port_start_write(ds);
+ pager_port_unlock(ds);
+
+ for (amount_sent = 0;
+ amount_sent < data_cnt;
+ amount_sent += vm_page_size) {
+
+ if (!default_has_page(&ds->dpager, offset + amount_sent)) {
+ if (default_write(&ds->dpager,
+ addr + amount_sent,
+ vm_page_size,
+ offset + amount_sent)
+ != PAGER_SUCCESS) {
+ printf("%s%s write error\n", my_name, here);
+ dstruct_lock(ds);
+ ds->errors++;
+ dstruct_unlock(ds);
+ }
+ }
+ }
+
+ pager_port_finish_write(ds);
+ if (vm_deallocate(default_pager_self, addr, data_cnt) != KERN_SUCCESS)
+ panic(here,my_name);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * memory_object_data_write: split up the stuff coming in from
+ * a memory_object_data_write call
+ * into individual pages and pass them off to default_write.
+ */
+kern_return_t
+seqnos_memory_object_data_write(pager, seqno, pager_request,
+ offset, addr, data_cnt)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ register
+ vm_offset_t offset;
+ register
+ pointer_t addr;
+ vm_size_t data_cnt;
+{
+ register
+ vm_size_t amount_sent;
+ default_pager_t ds;
+ static char here[] = "%sdata_write";
+
+#ifdef lint
+ pager_request++;
+#endif lint
+
+ if ((data_cnt % vm_page_size) != 0)
+ panic(here,my_name);
+
+ ds = pager_port_lookup(pager);
+ if (ds == DEFAULT_PAGER_NULL)
+ panic(here,my_name);
+ pager_port_lock(ds, seqno);
+ pager_port_check_request(ds, pager_request);
+ pager_port_start_write(ds);
+ pager_port_unlock(ds);
+
+ for (amount_sent = 0;
+ amount_sent < data_cnt;
+ amount_sent += vm_page_size) {
+
+ register int result;
+
+ result = default_write(&ds->dpager,
+ addr + amount_sent,
+ vm_page_size,
+ offset + amount_sent);
+ if (result != KERN_SUCCESS) {
+#if debug
+ printf("%s WRITE ERROR on default_pageout:", my_name);
+ printf(" pager=%x, offset=0x%x, length=0x%x, result=%d\n",
+ pager, offset+amount_sent, vm_page_size, result);
+#endif
+ dstruct_lock(ds);
+ ds->errors++;
+ dstruct_unlock(ds);
+ }
+ default_pager_pageout_count++;
+ }
+
+ pager_port_finish_write(ds);
+ if (vm_deallocate(default_pager_self, addr, data_cnt) != KERN_SUCCESS)
+ panic(here,my_name);
+
+ return(KERN_SUCCESS);
+}
+
+/*ARGSUSED*/
+kern_return_t
+seqnos_memory_object_copy(old_memory_object, seqno, old_memory_control,
+ offset, length, new_memory_object)
+ memory_object_t old_memory_object;
+ mach_port_seqno_t seqno;
+ memory_object_control_t
+ old_memory_control;
+ vm_offset_t offset;
+ vm_size_t length;
+ memory_object_t new_memory_object;
+{
+ panic("%scopy", my_name);
+ return KERN_FAILURE;
+}
+
+kern_return_t
+seqnos_memory_object_lock_completed(pager, seqno, pager_request,
+ offset, length)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ vm_size_t length;
+{
+#ifdef lint
+ pager++; seqno++; pager_request++; offset++; length++;
+#endif lint
+
+ panic("%slock_completed",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_data_unlock(pager, seqno, pager_request,
+ offset, addr, data_cnt)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ pointer_t addr;
+ vm_size_t data_cnt;
+{
+ panic("%sdata_unlock",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_supply_completed(pager, seqno, pager_request,
+ offset, length,
+ result, error_offset)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ vm_size_t length;
+ kern_return_t result;
+ vm_offset_t error_offset;
+{
+ panic("%ssupply_completed",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_data_return(pager, seqno, pager_request,
+ offset, addr, data_cnt,
+ dirty, kernel_copy)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ mach_port_t pager_request;
+ vm_offset_t offset;
+ pointer_t addr;
+ vm_size_t data_cnt;
+ boolean_t dirty;
+ boolean_t kernel_copy;
+{
+ panic("%sdata_return",my_name);
+ return(KERN_FAILURE);
+}
+
+kern_return_t
+seqnos_memory_object_change_completed(pager, seqno, may_cache, copy_strategy)
+ memory_object_t pager;
+ mach_port_seqno_t seqno;
+ boolean_t may_cache;
+ memory_object_copy_strategy_t copy_strategy;
+{
+ panic("%schange_completed",my_name);
+ return(KERN_FAILURE);
+}
+
+
+boolean_t default_pager_notify_server(in, out)
+ mach_msg_header_t *in, *out;
+{
+ register mach_no_senders_notification_t *n =
+ (mach_no_senders_notification_t *) in;
+
+ /*
+ * The only send-once rights we create are for
+ * receiving no-more-senders notifications.
+ * Hence, if we receive a message directed to
+ * a send-once right, we can assume it is
+ * a genuine no-senders notification from the kernel.
+ */
+
+ if ((n->not_header.msgh_bits !=
+ MACH_MSGH_BITS(0, MACH_MSG_TYPE_PORT_SEND_ONCE)) ||
+ (n->not_header.msgh_id != MACH_NOTIFY_NO_SENDERS))
+ return FALSE;
+
+ assert(n->not_header.msgh_size == sizeof *n);
+ assert(n->not_header.msgh_remote_port == MACH_PORT_NULL);
+
+ assert(n->not_type.msgt_name == MACH_MSG_TYPE_INTEGER_32);
+ assert(n->not_type.msgt_size == 32);
+ assert(n->not_type.msgt_number == 1);
+ assert(n->not_type.msgt_inline);
+ assert(! n->not_type.msgt_longform);
+
+ default_pager_no_senders(n->not_header.msgh_local_port,
+ n->not_header.msgh_seqno, n->not_count);
+
+ out->msgh_remote_port = MACH_PORT_NULL;
+ return TRUE;
+}
+
+extern boolean_t seqnos_memory_object_server();
+extern boolean_t seqnos_memory_object_default_server();
+extern boolean_t default_pager_server();
+extern boolean_t exc_server();
+extern boolean_t bootstrap_server();
+extern void bootstrap_compat();
+
+mach_msg_size_t default_pager_msg_size_object = 128;
+
+boolean_t
+default_pager_demux_object(in, out)
+ mach_msg_header_t *in;
+ mach_msg_header_t *out;
+{
+ /*
+ * We receive memory_object_data_initialize messages in
+ * the memory_object_default interface.
+ */
+
+ clearerr (stdout);
+ printf ("dpi object message %d\n", in->msgh_id);
+ return (seqnos_memory_object_server(in, out) ||
+ seqnos_memory_object_default_server(in, out) ||
+ default_pager_notify_server(in, out));
+}
+
+mach_msg_size_t default_pager_msg_size_default = 8 * 1024;
+
+boolean_t
+default_pager_demux_default(in, out)
+ mach_msg_header_t *in;
+ mach_msg_header_t *out;
+{
+ clearerr (stdout);
+ printf ("dpi message %d\n", in->msgh_id);
+ if (in->msgh_local_port == default_pager_default_port) {
+ /*
+ * We receive memory_object_create messages in
+ * the memory_object_default interface.
+ */
+
+ return (seqnos_memory_object_default_server(in, out) ||
+ default_pager_server(in, out));
+ } else if (in->msgh_local_port == default_pager_exception_port) {
+ /*
+ * We receive exception messages for
+ * ourself and the startup task.
+ */
+
+ return exc_server(in, out);
+ } else if (in->msgh_local_port == default_pager_bootstrap_port) {
+ /*
+ * We receive bootstrap requests
+ * from the startup task.
+ */
+
+ if (in->msgh_id == 999999) {
+ /* compatibility for old bootstrap interface */
+
+ bootstrap_compat(in, out);
+ return TRUE;
+ }
+
+ return bootstrap_server(in, out);
+ } else {
+ panic(my_name);
+ return FALSE;
+ }
+}
+
+/*
+ * We use multiple threads, for two reasons.
+ *
+ * First, memory objects created by default_pager_object_create
+ * are "external", instead of "internal". This means the kernel
+ * sends data (memory_object_data_write) to the object pageable.
+ * To prevent deadlocks, the external and internal objects must
+ * be managed by different threads.
+ *
+ * Second, the default pager uses synchronous IO operations.
+ * Spreading requests across multiple threads should
+ * recover some of the performance loss from synchronous IO.
+ *
+ * We have 3+ threads.
+ * One receives memory_object_create and
+ * default_pager_object_create requests.
+ * One or more manage internal objects.
+ * One or more manage external objects.
+ */
+
+void
+default_pager_thread_privileges()
+{
+ /*
+ * Set thread privileges.
+ */
+ cthread_wire(); /* attach kernel thread to cthread */
+ wire_thread(); /* grab a kernel stack and memory allocation
+ privileges */
+}
+
+any_t
+default_pager_thread(arg)
+ any_t arg;
+{
+ default_pager_thread_t *dpt = (default_pager_thread_t *) arg;
+ mach_port_t pset;
+ kern_return_t kr;
+
+ printf ("dtp\n");
+ cthread_set_data(cthread_self(), (any_t) dpt);
+
+ printf ("dtp %#x\n", dpt);
+
+ /*
+ * Threads handling external objects cannot have
+ * privileges. Otherwise a burst of data-requests for an
+ * external object could empty the free-page queue,
+ * because the fault code only reserves real pages for
+ * requests sent to internal objects.
+ */
+
+ if (dpt->dpt_internal) {
+ default_pager_thread_privileges();
+ pset = default_pager_internal_set;
+ } else {
+ pset = default_pager_external_set;
+ }
+
+ for (;;) {
+ printf ("dtp %#x loop\n", dpt);
+ kr = mach_msg_server(default_pager_demux_object,
+ default_pager_msg_size_object,
+ pset);
+ panic(my_name, kr);
+ }
+}
+
+void
+start_default_pager_thread(internal)
+ boolean_t internal;
+{
+ default_pager_thread_t *dpt;
+ kern_return_t kr;
+
+ dpt = (default_pager_thread_t *) kalloc(sizeof *dpt);
+ if (dpt == 0)
+ panic(my_name);
+
+ dpt->dpt_internal = internal;
+
+ kr = vm_allocate(default_pager_self, &dpt->dpt_buffer,
+ vm_page_size, TRUE);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+ wire_memory(dpt->dpt_buffer, vm_page_size,
+ VM_PROT_READ|VM_PROT_WRITE);
+
+ printf ("starting thread %d\n", internal);
+
+ dpt->dpt_thread = cthread_fork(default_pager_thread, (any_t) dpt);
+}
+
+void
+default_pager_initialize(host_port)
+ mach_port_t host_port;
+{
+ memory_object_t DMM;
+ kern_return_t kr;
+
+ /*
+ * This task will become the default pager.
+ */
+ default_pager_self = mach_task_self();
+
+ /*
+ * Initialize the "default pager" port.
+ */
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
+ &default_pager_default_port);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ DMM = default_pager_default_port;
+ kr = vm_set_default_memory_manager(host_port, &DMM);
+ if ((kr != KERN_SUCCESS) || (DMM != MACH_PORT_NULL))
+ panic(my_name);
+
+ /*
+ * Initialize the exception port.
+ */
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
+ &default_pager_exception_port);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ /*
+ * Initialize the bootstrap port.
+ */
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE,
+ &default_pager_bootstrap_port);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ /*
+ * Arrange for wiring privileges.
+ */
+ wire_setup(host_port);
+
+ /*
+ * Find out how many CPUs we have, to determine the number
+ * of threads to create.
+ */
+ if (default_pager_internal_count == 0) {
+ host_basic_info_data_t h_info;
+ natural_t h_info_count;
+
+ h_info_count = HOST_BASIC_INFO_COUNT;
+ (void) host_info(host_port, HOST_BASIC_INFO,
+ (host_info_t)&h_info, &h_info_count);
+
+ /*
+ * Random computation to get more parallelism on
+ * multiprocessors.
+ */
+ default_pager_internal_count =
+ (h_info.avail_cpus > 32 ? 32 : h_info.avail_cpus) / 4 + 3;
+ }
+}
+
+/*
+ * Initialize and Run the default pager
+ */
+void
+default_pager()
+{
+ kern_return_t kr;
+ int i;
+
+ printf ("dp1\n");
+ default_pager_thread_privileges();
+
+ /*
+ * Wire down code, data, stack
+ */
+ wire_all_memory();
+
+ printf ("dp2\n");
+
+ /*
+ * Initialize the list of all pagers.
+ */
+ pager_port_list_init();
+
+ printf ("dp3\n");
+
+ /*
+ * This thread will receive memory_object_create
+ * requests from the kernel and default_pager_object_create
+ * requests from the user via default_pager_default_port.
+ */
+
+ default_pager_default_thread = mach_thread_self();
+
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
+ &default_pager_internal_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
+ &default_pager_external_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET,
+ &default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_move_member(default_pager_self,
+ default_pager_default_port,
+ default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_move_member(default_pager_self,
+ default_pager_exception_port,
+ default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ kr = mach_port_move_member(default_pager_self,
+ default_pager_bootstrap_port,
+ default_pager_default_set);
+ if (kr != KERN_SUCCESS)
+ panic(my_name);
+
+ printf ("dp4\n");
+
+ /*
+ * Now we create the threads that will actually
+ * manage objects.
+ */
+
+ for (i = 0; i < default_pager_internal_count; i++)
+ start_default_pager_thread(TRUE);
+
+ printf ("dp5\n");
+
+ for (i = 0; i < default_pager_external_count; i++)
+ start_default_pager_thread(FALSE);
+
+ printf ("dp6\n");
+
+ for (;;) {
+ printf ("dp7\n");
+ kr = mach_msg_server(default_pager_demux_default,
+ default_pager_msg_size_default,
+ default_pager_default_set);
+ panic(my_name, kr);
+ }
+}
+
+/*
+ * Create an external object.
+ */
+kern_return_t default_pager_object_create(pager, mem_obj, size)
+ mach_port_t pager;
+ mach_port_t *mem_obj;
+ vm_size_t size;
+{
+ default_pager_t ds;
+ mach_port_t port;
+ kern_return_t result;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ ds = pager_port_alloc(size);
+rename_it:
+ port = (mach_port_t) pnameof(ds);
+ result = mach_port_allocate_name(default_pager_self,
+ MACH_PORT_RIGHT_RECEIVE, port);
+ if (result != KERN_SUCCESS) {
+ default_pager_t ds1;
+
+ if (result != KERN_NAME_EXISTS) return (result);
+
+ ds1 = (default_pager_t) kalloc(sizeof *ds1);
+ *ds1 = *ds;
+ mutex_lock(&all_pagers.lock);
+ queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links);
+ mutex_unlock(&all_pagers.lock);
+ ds = ds1;
+ goto rename_it;
+ }
+
+ /*
+ * Set up associations between these ports
+ * and this default_pager structure
+ */
+
+ ds->pager = port;
+ pager_port_list_insert(port, ds);
+ default_pager_add(ds, FALSE);
+
+ *mem_obj = port;
+ return (KERN_SUCCESS);
+}
+
+kern_return_t default_pager_info(pager, infop)
+ mach_port_t pager;
+ default_pager_info_t *infop;
+{
+ vm_size_t total, free;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ mutex_lock(&all_partitions.lock);
+ paging_space_info(&total, &free);
+ mutex_unlock(&all_partitions.lock);
+
+ infop->dpi_total_space = ptoa(total);
+ infop->dpi_free_space = ptoa(free);
+ infop->dpi_page_size = vm_page_size;
+ return KERN_SUCCESS;
+}
+
+kern_return_t default_pager_objects(pager, objectsp, ocountp, portsp, pcountp)
+ mach_port_t pager;
+ default_pager_object_array_t *objectsp;
+ natural_t *ocountp;
+ mach_port_array_t *portsp;
+ natural_t *pcountp;
+{
+ vm_offset_t oaddr; /* memory for objects */
+ vm_size_t osize; /* current size */
+ default_pager_object_t *objects;
+ natural_t opotential;
+
+ vm_offset_t paddr; /* memory for ports */
+ vm_size_t psize; /* current size */
+ mach_port_t *ports;
+ natural_t ppotential;
+
+ unsigned int actual;
+ unsigned int num_pagers;
+ kern_return_t kr;
+ default_pager_t entry;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ /* start with the inline memory */
+
+ num_pagers = 0;
+
+ objects = *objectsp;
+ opotential = *ocountp;
+
+ ports = *portsp;
+ ppotential = *pcountp;
+
+ mutex_lock(&all_pagers.lock);
+ /*
+ * We will send no more than this many
+ */
+ actual = all_pagers.count;
+ mutex_unlock(&all_pagers.lock);
+
+ if (opotential < actual) {
+ vm_offset_t newaddr;
+ vm_size_t newsize;
+
+ newsize = 2 * round_page(actual * sizeof *objects);
+
+ kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE);
+ if (kr != KERN_SUCCESS)
+ goto nomemory;
+
+ oaddr = newaddr;
+ osize = newsize;
+ opotential = osize/sizeof *objects;
+ objects = (default_pager_object_t *) oaddr;
+ }
+
+ if (ppotential < actual) {
+ vm_offset_t newaddr;
+ vm_size_t newsize;
+
+ newsize = 2 * round_page(actual * sizeof *ports);
+
+ kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE);
+ if (kr != KERN_SUCCESS)
+ goto nomemory;
+
+ paddr = newaddr;
+ psize = newsize;
+ ppotential = psize/sizeof *ports;
+ ports = (mach_port_t *) paddr;
+ }
+
+ /*
+ * Now scan the list.
+ */
+
+ mutex_lock(&all_pagers.lock);
+
+ num_pagers = 0;
+ queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
+
+ mach_port_t port;
+ vm_size_t size;
+
+ if ((num_pagers >= opotential) ||
+ (num_pagers >= ppotential)) {
+ /*
+ * This should be rare. In any case,
+ * we will only miss recent objects,
+ * because they are added at the end.
+ */
+ break;
+ }
+
+ /*
+ * Avoid interfering with normal operations
+ */
+ if (!mutex_try_lock(&entry->dpager.lock))
+ goto not_this_one;
+ size = pager_allocated(&entry->dpager);
+ mutex_unlock(&entry->dpager.lock);
+
+ dstruct_lock(entry);
+
+ port = entry->pager_name;
+ if (port == MACH_PORT_NULL) {
+ /*
+ * The object is waiting for no-senders
+ * or memory_object_init.
+ */
+ dstruct_unlock(entry);
+ goto not_this_one;
+ }
+
+ /*
+ * We need a reference for the reply message.
+ * While we are unlocked, the bucket queue
+ * can change and the object might be terminated.
+ * memory_object_terminate will wait for us,
+ * preventing deallocation of the entry.
+ */
+
+ if (--entry->name_refs == 0) {
+ dstruct_unlock(entry);
+
+ /* keep the list locked, wont take long */
+
+ kr = mach_port_mod_refs(default_pager_self,
+ port, MACH_PORT_RIGHT_SEND,
+ default_pager_max_urefs);
+ if (kr != KERN_SUCCESS)
+ panic("%sdefault_pager_objects",my_name);
+
+ dstruct_lock(entry);
+
+ entry->name_refs += default_pager_max_urefs;
+ pager_port_finish_refs(entry);
+ }
+ dstruct_unlock(entry);
+
+ /* the arrays are wired, so no deadlock worries */
+
+ objects[num_pagers].dpo_object = (vm_offset_t) entry;
+ objects[num_pagers].dpo_size = size;
+ ports [num_pagers++] = port;
+ continue;
+not_this_one:
+ /*
+ * Do not return garbage
+ */
+ objects[num_pagers].dpo_object = (vm_offset_t) 0;
+ objects[num_pagers].dpo_size = 0;
+ ports [num_pagers++] = MACH_PORT_NULL;
+
+ }
+
+ mutex_unlock(&all_pagers.lock);
+
+ /*
+ * Deallocate and clear unused memory.
+ * (Returned memory will automagically become pageable.)
+ */
+
+ if (objects == *objectsp) {
+ /*
+ * Our returned information fit inline.
+ * Nothing to deallocate.
+ */
+
+ *ocountp = num_pagers;
+ } else if (actual == 0) {
+ (void) vm_deallocate(default_pager_self, oaddr, osize);
+
+ /* return zero items inline */
+ *ocountp = 0;
+ } else {
+ vm_offset_t used;
+
+ used = round_page(actual * sizeof *objects);
+
+ if (used != osize)
+ (void) vm_deallocate(default_pager_self,
+ oaddr + used, osize - used);
+
+ *objectsp = objects;
+ *ocountp = num_pagers;
+ }
+
+ if (ports == *portsp) {
+ /*
+ * Our returned information fit inline.
+ * Nothing to deallocate.
+ */
+
+ *pcountp = num_pagers;
+ } else if (actual == 0) {
+ (void) vm_deallocate(default_pager_self, paddr, psize);
+
+ /* return zero items inline */
+ *pcountp = 0;
+ } else {
+ vm_offset_t used;
+
+ used = round_page(actual * sizeof *ports);
+
+ if (used != psize)
+ (void) vm_deallocate(default_pager_self,
+ paddr + used, psize - used);
+
+ *portsp = ports;
+ *pcountp = num_pagers;
+ }
+
+ return KERN_SUCCESS;
+
+ nomemory:
+
+ {
+ register int i;
+ for (i = 0; i < num_pagers; i++)
+ (void) mach_port_deallocate(default_pager_self, ports[i]);
+ }
+
+ if (objects != *objectsp)
+ (void) vm_deallocate(default_pager_self, oaddr, osize);
+
+ if (ports != *portsp)
+ (void) vm_deallocate(default_pager_self, paddr, psize);
+
+ return KERN_RESOURCE_SHORTAGE;
+}
+
+
+kern_return_t
+default_pager_object_pages(pager, object, pagesp, countp)
+ mach_port_t pager;
+ mach_port_t object;
+ default_pager_page_array_t *pagesp;
+ natural_t *countp;
+{
+ vm_offset_t addr; /* memory for page offsets */
+ vm_size_t size; /* current memory size */
+ default_pager_page_t *pages;
+ natural_t potential, actual;
+ kern_return_t kr;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+ /* we start with the inline space */
+
+ pages = *pagesp;
+ potential = *countp;
+
+ for (;;) {
+ default_pager_t entry;
+
+ mutex_lock(&all_pagers.lock);
+ queue_iterate(&all_pagers.queue, entry, default_pager_t, links) {
+ dstruct_lock(entry);
+ if (entry->pager_name == object) {
+ mutex_unlock(&all_pagers.lock);
+ goto found_object;
+ }
+ dstruct_unlock(entry);
+ }
+ mutex_unlock(&all_pagers.lock);
+
+ /* did not find the object */
+
+ if (pages != *pagesp)
+ (void) vm_deallocate(default_pager_self, addr, size);
+ return KERN_INVALID_ARGUMENT;
+
+ found_object:
+
+ if (!mutex_try_lock(&entry->dpager.lock)) {
+ /* oh well bad luck */
+
+ dstruct_unlock(entry);
+
+ /* yield the processor */
+ (void) thread_switch(MACH_PORT_NULL,
+ SWITCH_OPTION_NONE, 0);
+ continue;
+ }
+
+ actual = pager_pages(&entry->dpager, pages, potential);
+ mutex_unlock(&entry->dpager.lock);
+ dstruct_unlock(entry);
+
+ if (actual <= potential)
+ break;
+
+ /* allocate more memory */
+
+ if (pages != *pagesp)
+ (void) vm_deallocate(default_pager_self, addr, size);
+ size = round_page(actual * sizeof *pages);
+ kr = vm_allocate(default_pager_self, &addr, size, TRUE);
+ if (kr != KERN_SUCCESS)
+ return kr;
+ pages = (default_pager_page_t *) addr;
+ potential = size/sizeof *pages;
+ }
+
+ /*
+ * Deallocate and clear unused memory.
+ * (Returned memory will automagically become pageable.)
+ */
+
+ if (pages == *pagesp) {
+ /*
+ * Our returned information fit inline.
+ * Nothing to deallocate.
+ */
+
+ *countp = actual;
+ } else if (actual == 0) {
+ (void) vm_deallocate(default_pager_self, addr, size);
+
+ /* return zero items inline */
+ *countp = 0;
+ } else {
+ vm_offset_t used;
+
+ used = round_page(actual * sizeof *pages);
+
+ if (used != size)
+ (void) vm_deallocate(default_pager_self,
+ addr + used, size - used);
+
+ *pagesp = pages;
+ *countp = actual;
+ }
+ return KERN_SUCCESS;
+}
+
+/*
+ * Add/remove extra paging space
+ */
+
+extern mach_port_t bootstrap_master_device_port;
+extern mach_port_t bootstrap_master_host_port;
+
+kern_return_t
+default_pager_paging_file(pager, mdport, file_name, add)
+ mach_port_t pager;
+ mach_port_t mdport;
+ default_pager_filename_t file_name;
+ boolean_t add;
+{
+ kern_return_t kr;
+
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+
+#if 0
+printf("bmd %x md %x\n", bootstrap_master_device_port, mdport);
+#endif
+ if (add) {
+ kr = add_paging_file(bootstrap_master_device_port,
+ file_name);
+ } else {
+ kr = remove_paging_file(file_name);
+ }
+
+ /* XXXX more code needed */
+ if (mdport != bootstrap_master_device_port)
+ mach_port_deallocate( mach_task_self(), mdport);
+
+ return kr;
+}
+
+default_pager_register_fileserver(pager, fileserver)
+ mach_port_t pager;
+ mach_port_t fileserver;
+{
+ if (pager != default_pager_default_port)
+ return KERN_INVALID_ARGUMENT;
+#if notyet
+ mach_port_deallocate(mach_task_self(), fileserver);
+ if (0) dp_helper_paging_space(0,0,0);/*just linkit*/
+#endif
+ return KERN_SUCCESS;
+}
+
+/*
+ * When things do not quite workout...
+ */
+no_paging_space(out_of_memory)
+ boolean_t out_of_memory;
+{
+ static char here[] = "%s *** NOT ENOUGH PAGING SPACE ***";
+
+ if (out_of_memory)
+ printf("*** OUT OF MEMORY *** ");
+ panic(here, my_name);
+}
+
+overcommitted(got_more_space, space)
+ boolean_t got_more_space;
+ vm_size_t space; /* in pages */
+{
+ vm_size_t pages_free, pages_total;
+
+ static boolean_t user_warned = FALSE;
+ static vm_size_t pages_shortage = 0;
+
+ paging_space_info(&pages_total, &pages_free);
+
+ /*
+ * If user added more space, see if it is enough
+ */
+ if (got_more_space) {
+ pages_free -= pages_shortage;
+ if (pages_free > 0) {
+ pages_shortage = 0;
+ if (user_warned)
+ printf("%s paging space ok now.\n", my_name);
+ } else
+ pages_shortage = pages_free;
+ user_warned = FALSE;
+ return;
+ }
+ /*
+ * We ran out of gas, let user know.
+ */
+ pages_free -= space;
+ pages_shortage = (pages_free > 0) ? 0 : -pages_free;
+ if (!user_warned && pages_shortage) {
+ user_warned = TRUE;
+ printf("%s paging space over-committed.\n", my_name);
+ }
+#if debug
+ user_warned = FALSE;
+ printf("%s paging space over-committed [+%d (%d) pages].\n",
+ my_name, space, pages_shortage);
+#endif
+}
+
+paging_space_info(totp, freep)
+ vm_size_t *totp, *freep;
+{
+ register vm_size_t total, free;
+ register partition_t part;
+ register int i;
+
+ total = free = 0;
+ for (i = 0; i < all_partitions.n_partitions; i++) {
+
+ if ((part = partition_of(i)) == 0) continue;
+
+ /* no need to lock: by the time this data
+ gets back to any remote requestor it
+ will be obsolete anyways */
+ total += part->total_size;
+ free += part->free;
+#if debug
+ printf("Partition %d: x%x total, x%x free\n",
+ i, part->total_size, part->free);
+#endif
+ }
+ *totp = total;
+ *freep = free;
+}
+
+/*
+ * Catch exceptions.
+ */
+
+kern_return_t
+catch_exception_raise(exception_port, thread, task, exception, code, subcode)
+ mach_port_t exception_port;
+ mach_port_t thread, task;
+ int exception, code, subcode;
+{
+ printf("(default_pager)catch_exception_raise(%d,%d,%d)\n",
+ exception, code, subcode);
+ panic(my_name);
+
+ /* mach_msg_server will deallocate thread/task for us */
+
+ return KERN_FAILURE;
+}
+
+/*
+ * Handle bootstrap requests.
+ */
+
+kern_return_t
+do_bootstrap_privileged_ports(bootstrap, hostp, devicep)
+ mach_port_t bootstrap;
+ mach_port_t *hostp, *devicep;
+{
+ *hostp = bootstrap_master_host_port;
+ *devicep = bootstrap_master_device_port;
+ return KERN_SUCCESS;
+}
+
+void
+bootstrap_compat(in, out)
+ mach_msg_header_t *in, *out;
+{
+ mig_reply_header_t *reply = (mig_reply_header_t *) out;
+ mach_msg_return_t mr;
+
+ struct imsg {
+ mach_msg_header_t hdr;
+ mach_msg_type_t port_desc_1;
+ mach_port_t port_1;
+ mach_msg_type_t port_desc_2;
+ mach_port_t port_2;
+ } imsg;
+
+ /*
+ * Send back the host and device ports.
+ */
+
+ imsg.hdr.msgh_bits = MACH_MSGH_BITS_COMPLEX |
+ MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(in->msgh_bits), 0);
+ /* msgh_size doesn't need to be initialized */
+ imsg.hdr.msgh_remote_port = in->msgh_remote_port;
+ imsg.hdr.msgh_local_port = MACH_PORT_NULL;
+ /* msgh_seqno doesn't need to be initialized */
+ imsg.hdr.msgh_id = in->msgh_id + 100; /* this is a reply msg */
+
+ imsg.port_desc_1.msgt_name = MACH_MSG_TYPE_COPY_SEND;
+ imsg.port_desc_1.msgt_size = (sizeof(mach_port_t) * 8);
+ imsg.port_desc_1.msgt_number = 1;
+ imsg.port_desc_1.msgt_inline = TRUE;
+ imsg.port_desc_1.msgt_longform = FALSE;
+ imsg.port_desc_1.msgt_deallocate = FALSE;
+ imsg.port_desc_1.msgt_unused = 0;
+
+ imsg.port_1 = bootstrap_master_host_port;
+
+ imsg.port_desc_2 = imsg.port_desc_1;
+
+ imsg.port_2 = bootstrap_master_device_port;
+
+ /*
+ * Send the reply message.
+ * (mach_msg_server can not do this, because the reply
+ * is not in standard format.)
+ */
+
+ mr = mach_msg(&imsg.hdr, MACH_SEND_MSG,
+ sizeof imsg, 0, MACH_PORT_NULL,
+ MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
+ if (mr != MACH_MSG_SUCCESS)
+ (void) mach_port_deallocate(default_pager_self,
+ imsg.hdr.msgh_remote_port);
+
+ /*
+ * Tell mach_msg_server to do nothing.
+ */
+
+ reply->RetCode = MIG_NO_REPLY;
+}
+
+#ifdef mips
+/*
+ * set_ras_address for default pager
+ * Default pager does not have emulator support
+ * so it needs a local version of set_ras_address.
+ */
+int
+set_ras_address(basepc, boundspc)
+ vm_offset_t basepc;
+ vm_offset_t boundspc;
+{
+ kern_return_t status;
+
+ status = task_ras_control(mach_task_self(), basepc, boundspc,
+ TASK_RAS_CONTROL_INSTALL_ONE);
+ if (status != KERN_SUCCESS)
+ return -1;
+ return 0;
+}
+#endif
diff --git a/serverboot/defs.h b/serverboot/defs.h
new file mode 100644
index 00000000..7b872fd6
--- /dev/null
+++ b/serverboot/defs.h
@@ -0,0 +1,95 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Common definitions for Berkeley Fast File System.
+ */
+
+/*
+ * Compatibility definitions for disk IO.
+ */
+
+/*
+ * Disk devices do all IO in 512-byte blocks.
+ */
+#define DEV_BSIZE 512
+
+/*
+ * Conversion between bytes and disk blocks.
+ */
+#define btodb(byte_offset) ((byte_offset) >> 9)
+#define dbtob(block_number) ((block_number) << 9)
+
+/*
+ * Compatibility definitions for old type names.
+ */
+
+typedef struct _quad_ {
+ unsigned int val[2]; /* 2 int values make... */
+} quad; /* an 8-byte item */
+
+#if 0
+typedef unsigned char u_char; /* unsigned char */
+typedef unsigned short u_short; /* unsigned short */
+typedef unsigned int u_int; /* unsigned int */
+
+typedef unsigned int time_t; /* an unsigned int */
+typedef unsigned int daddr_t; /* an unsigned int */
+typedef unsigned int off_t; /* another unsigned int */
+
+typedef unsigned short uid_t;
+typedef unsigned short gid_t;
+typedef unsigned int ino_t;
+#endif
+
+#define NBBY 8
+
+/*
+ * The file system is made out of blocks of at most MAXBSIZE units,
+ * with smaller units (fragments) only in the last direct block.
+ * MAXBSIZE primarily determines the size of buffers in the buffer
+ * pool. It may be made larger without any effect on existing
+ * file systems; however, making it smaller may make some file
+ * systems unmountable.
+ *
+ * Note that the disk devices are assumed to have DEV_BSIZE "sectors"
+ * and that fragments must be some multiple of this size.
+ */
+#define MAXBSIZE 8192
+#define MAXFRAG 8
+
+/*
+ * MAXPATHLEN defines the longest permissible path length
+ * after expanding symbolic links.
+ *
+ * MAXSYMLINKS defines the maximum number of symbolic links
+ * that may be expanded in a path name. It should be set
+ * high enough to allow all legitimate uses, but halt infinite
+ * loops reasonably quickly.
+ */
+
+#define MAXPATHLEN 1024
+#define MAXSYMLINKS 8
+
diff --git a/serverboot/dir.h b/serverboot/dir.h
new file mode 100644
index 00000000..208df5ce
--- /dev/null
+++ b/serverboot/dir.h
@@ -0,0 +1,142 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Copyright (c) 1982, 1986, 1989 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * @(#)dir.h 7.6 (Berkeley) 5/9/89
+ */
+
+#ifndef _BOOT_UFS_DIR_H_
+#define _BOOT_UFS_DIR_H_
+
+/*
+ * A directory consists of some number of blocks of DIRBLKSIZ
+ * bytes, where DIRBLKSIZ is chosen such that it can be transferred
+ * to disk in a single atomic operation (e.g. 512 bytes on most machines).
+ *
+ * Each DIRBLKSIZ byte block contains some number of directory entry
+ * structures, which are of variable length. Each directory entry has
+ * a struct direct at the front of it, containing its inode number,
+ * the length of the entry, and the length of the name contained in
+ * the entry. These are followed by the name padded to a 4 byte boundary
+ * with null bytes. All names are guaranteed null terminated.
+ * The maximum length of a name in a directory is MAXNAMLEN.
+ *
+ * The macro DIRSIZ(dp) gives the amount of space required to represent
+ * a directory entry. Free space in a directory is represented by
+ * entries which have dp->d_reclen > DIRSIZ(dp). All DIRBLKSIZ bytes
+ * in a directory block are claimed by the directory entries. This
+ * usually results in the last entry in a directory having a large
+ * dp->d_reclen. When entries are deleted from a directory, the
+ * space is returned to the previous entry in the same directory
+ * block by increasing its dp->d_reclen. If the first entry of
+ * a directory block is free, then its dp->d_ino is set to 0.
+ * Entries other than the first in a directory do not normally have
+ * dp->d_ino set to 0.
+ */
+#define DIRBLKSIZ DEV_BSIZE
+#define MAXNAMLEN 255
+
+struct direct {
+ u_int d_ino; /* inode number of entry */
+ u_short d_reclen; /* length of this record */
+ u_short d_namlen; /* length of string in d_name */
+ char d_name[MAXNAMLEN + 1]; /* name with length <= MAXNAMLEN */
+};
+
+/*
+ * The DIRSIZ macro gives the minimum record length which will hold
+ * the directory entry. This requires the amount of space in struct direct
+ * without the d_name field, plus enough space for the name with a terminating
+ * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary.
+ */
+#undef DIRSIZ
+#define DIRSIZ(dp) \
+ ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3))
+
+#ifdef KERNEL
+/*
+ * Template for manipulating directories.
+ * Should use struct direct's, but the name field
+ * is MAXNAMLEN - 1, and this just won't do.
+ */
+struct dirtemplate {
+ u_int dot_ino;
+ short dot_reclen;
+ short dot_namlen;
+ char dot_name[4]; /* must be multiple of 4 */
+ u_int dotdot_ino;
+ short dotdot_reclen;
+ short dotdot_namlen;
+ char dotdot_name[4]; /* ditto */
+};
+#endif
+
+/*
+ * The following information should be obtained from <dirent.h>
+ * and is provided solely (and temporarily) for backward compatibility.
+ */
+#ifndef KERNEL
+#define d_fileno d_ino /* compatibility with POSIX */
+#ifndef DEV_BSIZE
+#define DEV_BSIZE 512
+#endif
+/*
+ * Definitions for library routines operating on directories.
+ */
+typedef struct _dirdesc {
+ int dd_fd;
+ int dd_loc;
+ int dd_size;
+ char dd_buf[DIRBLKSIZ];
+} DIR;
+
+#define dirfd(dirp) ((dirp)->dd_fd)
+
+#ifndef NULL
+#define NULL 0
+#endif
+extern DIR *opendir();
+extern struct direct *readdir();
+extern int telldir();
+extern void seekdir();
+#define rewinddir(dirp) seekdir((dirp), (long)0)
+extern void closedir();
+#endif /* not KERNEL */
+#endif /* _BOOT_UFS_DIR_H_ */
diff --git a/serverboot/disk_inode.h b/serverboot/disk_inode.h
new file mode 100644
index 00000000..e0f49ea3
--- /dev/null
+++ b/serverboot/disk_inode.h
@@ -0,0 +1,101 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Copyright (c) 1982, 1989 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * @(#)inode.h 7.5 (Berkeley) 7/3/89
+ */
+
+#ifndef _BOOT_UFS_DISK_INODE_H_
+#define _BOOT_UFS_DISK_INODE_H_
+
+/*
+ * The I node is the focus of all file activity in the BSD Fast File System.
+ * There is a unique inode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An inode is 'named' by its dev/inumber pair. (iget/iget.c)
+ * Data in icommon is read in from permanent inode on volume.
+ */
+
+#define FFS_NDADDR 12 /* direct addresses in inode */
+#define FFS_NIADDR 3 /* indirect addresses in inode */
+
+#define FFS_MAX_FASTLINK_SIZE ((FFS_NDADDR + FFS_NIADDR) * sizeof(daddr_t))
+
+struct icommon {
+ u_short ic_mode; /* 0: mode and type of file */
+ short ic_nlink; /* 2: number of links to file */
+ uid_t ic_uid; /* 4: owner's user id */
+ gid_t ic_gid; /* 6: owner's group id */
+ quad ic_size; /* 8: number of bytes in file */
+ time_t ic_atime; /* 16: time last accessed */
+ int ic_atspare;
+ time_t ic_mtime; /* 24: time last modified */
+ int ic_mtspare;
+ time_t ic_ctime; /* 32: last time inode changed */
+ int ic_ctspare;
+ union {
+ struct {
+ daddr_t Mb_db[FFS_NDADDR]; /* 40: disk block addresses */
+ daddr_t Mb_ib[FFS_NIADDR]; /* 88: indirect blocks */
+ } ic_Mb;
+ char ic_Msymlink[FFS_MAX_FASTLINK_SIZE];
+ /* 40: symbolic link name */
+ } ic_Mun;
+#define ic_db ic_Mun.ic_Mb.Mb_db
+#define ic_ib ic_Mun.ic_Mb.Mb_ib
+#define ic_symlink ic_Mun.ic_Msymlink
+ int ic_flags; /* 100: status, currently unused */
+ int ic_blocks; /* 104: blocks actually held */
+ int ic_gen; /* 108: generation number */
+ int ic_spare[4]; /* 112: reserved, currently unused */
+} i_ic;
+
+/*
+ * Same structure, but on disk.
+ */
+struct dinode {
+ union {
+ struct icommon di_com;
+ char di_char[128];
+ } di_un;
+};
+#define di_ic di_un.di_com
+
+#endif /* _BOOT_UFS_DISK_INODE_H_ */
diff --git a/serverboot/disk_inode_ffs.h b/serverboot/disk_inode_ffs.h
new file mode 100644
index 00000000..43690b2f
--- /dev/null
+++ b/serverboot/disk_inode_ffs.h
@@ -0,0 +1,99 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Copyright (c) 1982, 1989 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * @(#)inode.h 7.5 (Berkeley) 7/3/89
+ */
+
+#ifndef _BOOT_UFS_DISK_INODE_FFS_H_
+#define _BOOT_UFS_DISK_INODE_FFS_H_
+
+#define NDADDR FFS_NDADDR
+#define NIADDR FFS_NIADDR
+
+#define MAX_FASTLINK_SIZE FFS_MAX_FASTLINK_SIZE
+
+#define IC_FASTLINK 0x0001 /* Symbolic link in inode */
+
+#define i_mode i_ic.ic_mode
+#define i_nlink i_ic.ic_nlink
+#define i_uid i_ic.ic_uid
+#define i_gid i_ic.ic_gid
+#if BYTE_MSF
+#define i_size i_ic.ic_size.val[1]
+#else /* BYTE_LSF */
+#define i_size i_ic.ic_size.val[0]
+#endif
+#define i_db i_ic.ic_db
+#define i_ib i_ic.ic_ib
+#define i_atime i_ic.ic_atime
+#define i_mtime i_ic.ic_mtime
+#define i_ctime i_ic.ic_ctime
+#define i_blocks i_ic.ic_blocks
+#define i_rdev i_ic.ic_db[0]
+#define i_symlink i_ic.ic_symlink
+#define i_flags i_ic.ic_flags
+#define i_gen i_ic.ic_gen
+
+/* modes */
+#define IFMT 0xf000 /* type of file */
+#define IFCHR 0x2000 /* character special */
+#define IFDIR 0x4000 /* directory */
+#define IFBLK 0x6000 /* block special */
+#define IFREG 0x8000 /* regular */
+#define IFLNK 0xa000 /* symbolic link */
+#define IFSOCK 0xc000 /* socket */
+
+
+#define ISUID 0x0800 /* set user id on execution */
+#define ISGID 0x0400 /* set group id on execution */
+#define ISVTX 0x0200 /* save swapped text even after use */
+#define IREAD 0x0100 /* read, write, execute permissions */
+#define IWRITE 0x0080
+#define IEXEC 0x0040
+
+#define f_fs u.ffs.ffs_fs
+#define i_ic u.ffs.ffs_ic
+#define f_nindir u.ffs.ffs_nindir
+#define f_blk u.ffs.ffs_blk
+#define f_blksize u.ffs.ffs_blksize
+#define f_blkno u.ffs.ffs_blkno
+
+#endif _BOOT_UFS_DISK_INODE_FFS_H_
diff --git a/serverboot/elf-load.c b/serverboot/elf-load.c
new file mode 100644
index 00000000..1d103d3c
--- /dev/null
+++ b/serverboot/elf-load.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 1995, 1994, 1993, 1992, 1991, 1990
+ * Open Software Foundation, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby granted,
+ * provided that the above copyright notice appears in all copies and
+ * that both the copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of ("OSF") or Open Software
+ * Foundation not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior permission.
+ *
+ * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL OSF BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE
+ */
+/*
+ * OSF Research Institute MK6.1 (unencumbered) 1/31/1995
+ */
+
+#include <alloca.h>
+#include <mach/machine/vm_types.h>
+#include <mach/exec/elf.h>
+#include <mach/exec/exec.h>
+
+int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec,
+ void *handle, exec_info_t *out_info)
+{
+ vm_size_t actual;
+ Elf32_Ehdr x;
+ Elf32_Phdr *phdr, *ph;
+ vm_size_t phsize;
+ int i;
+ int result;
+
+ /* Read the ELF header. */
+ if ((result = (*read)(handle, 0, &x, sizeof(x), &actual)) != 0)
+ return result;
+ if (actual < sizeof(x))
+ return EX_NOT_EXECUTABLE;
+
+ if ((x.e_ident[EI_MAG0] != ELFMAG0) ||
+ (x.e_ident[EI_MAG1] != ELFMAG1) ||
+ (x.e_ident[EI_MAG2] != ELFMAG2) ||
+ (x.e_ident[EI_MAG3] != ELFMAG3))
+ return EX_NOT_EXECUTABLE;
+
+ /* Make sure the file is of the right architecture. */
+ if ((x.e_ident[EI_CLASS] != ELFCLASS32) ||
+ (x.e_ident[EI_DATA] != MY_EI_DATA) ||
+ (x.e_machine != MY_E_MACHINE))
+ return EX_WRONG_ARCH;
+
+ /* XXX others */
+ out_info->entry = (vm_offset_t) x.e_entry;
+
+ phsize = x.e_phnum * x.e_phentsize;
+ phdr = (Elf32_Phdr *)alloca(phsize);
+
+ result = (*read)(handle, x.e_phoff, phdr, phsize, &actual);
+ if (result)
+ return result;
+ if (actual < phsize)
+ return EX_CORRUPT;
+
+ for (i = 0; i < x.e_phnum; i++)
+ {
+ ph = (Elf32_Phdr *)((vm_offset_t)phdr + i * x.e_phentsize);
+ if (ph->p_type == PT_LOAD)
+ {
+ exec_sectype_t type = EXEC_SECTYPE_ALLOC |
+ EXEC_SECTYPE_LOAD;
+ if (ph->p_flags & PF_R) type |= EXEC_SECTYPE_READ;
+ if (ph->p_flags & PF_W) type |= EXEC_SECTYPE_WRITE;
+ if (ph->p_flags & PF_X) type |= EXEC_SECTYPE_EXECUTE;
+ result = (*read_exec)(handle,
+ ph->p_offset, ph->p_filesz,
+ ph->p_vaddr, ph->p_memsz, type);
+ }
+ }
+
+ return 0;
+}
+
diff --git a/serverboot/exec.c b/serverboot/exec.c
new file mode 100644
index 00000000..5b5feedc
--- /dev/null
+++ b/serverboot/exec.c
@@ -0,0 +1,88 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * i386-specific routines for loading a.out files.
+ */
+
+#include <mach.h>
+#include <mach/machine/vm_param.h>
+#include <mach/machine/eflags.h>
+#include <mach/exec/exec.h>
+
+#include <file_io.h>
+
+/*
+ * Machine-dependent portions of execve() for the i386.
+ */
+
+#define STACK_SIZE (64*1024)
+
+char *set_regs(
+ mach_port_t user_task,
+ mach_port_t user_thread,
+ struct exec_info *info,
+ int arg_size)
+{
+ vm_offset_t stack_start;
+ vm_offset_t stack_end;
+ struct i386_thread_state regs;
+ unsigned int reg_size;
+
+ /*
+ * Add space for 5 ints to arguments, for
+ * PS program. XXX
+ */
+ arg_size += 5 * sizeof(int);
+
+ /*
+ * Allocate stack.
+ */
+ stack_end = VM_MAX_ADDRESS;
+ stack_start = VM_MAX_ADDRESS - STACK_SIZE;
+ (void)vm_allocate(user_task,
+ &stack_start,
+ (vm_size_t)(stack_end - stack_start),
+ FALSE);
+
+ reg_size = i386_THREAD_STATE_COUNT;
+ (void)thread_get_state(user_thread,
+ i386_THREAD_STATE,
+ (thread_state_t)&regs,
+ &reg_size);
+
+ regs.eip = info->entry;
+ regs.uesp = (int)((stack_end - arg_size) & ~(sizeof(int)-1));
+
+ /* regs.efl |= EFL_TF; trace flag*/
+
+ (void)thread_set_state(user_thread,
+ i386_THREAD_STATE,
+ (thread_state_t)&regs,
+ reg_size);
+
+ return (char *)regs.uesp;
+}
+
diff --git a/serverboot/ext2_file_io.c b/serverboot/ext2_file_io.c
new file mode 100644
index 00000000..9d743368
--- /dev/null
+++ b/serverboot/ext2_file_io.c
@@ -0,0 +1,1099 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Stand-alone file reading package.
+ */
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <mach/mach_traps.h>
+#include <mach/mach_interface.h>
+
+#include "file_io.h"
+#include "ffs_compat.h"
+
+void ext2_close_file(); /* forward */
+
+/*
+ * Free file buffers, but don't close file.
+ */
+static void
+free_file_buffers(fp)
+ register struct file *fp;
+{
+ register int level;
+
+ /*
+ * Free the indirect blocks
+ */
+ for (level = 0; level < NIADDR; level++) {
+ if (fp->f_blk[level] != 0) {
+ (void) vm_deallocate(mach_task_self(),
+ fp->f_blk[level],
+ fp->f_blksize[level]);
+ fp->f_blk[level] = 0;
+ }
+ fp->f_blkno[level] = -1;
+ }
+
+ /*
+ * Free the data block
+ */
+ if (fp->f_buf != 0) {
+ (void) vm_deallocate(mach_task_self(),
+ fp->f_buf,
+ fp->f_buf_size);
+ fp->f_buf = 0;
+ }
+ fp->f_buf_blkno = -1;
+}
+
+/*
+ * Read a new inode into a file structure.
+ */
+static int
+read_inode(inumber, fp)
+ ino_t inumber;
+ register struct file *fp;
+{
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ register
+ struct ext2_super_block *fs;
+ daddr_t disk_block;
+ kern_return_t rc;
+
+ fs = fp->f_fs;
+ disk_block = ino2blk(fs, fp->f_gd, inumber);
+
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fp->f_fs, disk_block),
+ (int) EXT2_BLOCK_SIZE(fs),
+ (char **)&buf,
+ &buf_size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ {
+ register struct ext2_inode *dp;
+
+ dp = (struct ext2_inode *)buf;
+ dp += itoo(fs, inumber);
+ fp->i_ic = *dp;
+ fp->f_size = dp->i_size;
+ }
+
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+
+ /*
+ * Clear out the old buffers
+ */
+ free_file_buffers(fp);
+
+ return (0);
+}
+
+/*
+ * Given an offset in a file, find the disk block number that
+ * contains that block.
+ */
+static int
+block_map(fp, file_block, disk_block_p)
+ struct file *fp;
+ daddr_t file_block;
+ daddr_t *disk_block_p; /* out */
+{
+ int level;
+ int idx;
+ daddr_t ind_block_num;
+ kern_return_t rc;
+
+ vm_offset_t olddata[NIADDR+1];
+ vm_size_t oldsize[NIADDR+1];
+
+ /*
+ * Index structure of an inode:
+ *
+ * i_db[0..NDADDR-1] hold block numbers for blocks
+ * 0..NDADDR-1
+ *
+ * i_ib[0] index block 0 is the single indirect
+ * block
+ * holds block numbers for blocks
+ * NDADDR .. NDADDR + NINDIR(fs)-1
+ *
+ * i_ib[1] index block 1 is the double indirect
+ * block
+ * holds block numbers for INDEX blocks
+ * for blocks
+ * NDADDR + NINDIR(fs) ..
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1
+ *
+ * i_ib[2] index block 2 is the triple indirect
+ * block
+ * holds block numbers for double-indirect
+ * blocks for blocks
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2 ..
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2
+ * + NINDIR(fs)**3 - 1
+ */
+
+ mutex_lock(&fp->f_lock);
+
+ if (file_block < NDADDR) {
+ /* Direct block. */
+ *disk_block_p = fp->i_ic.i_block[file_block];
+ mutex_unlock(&fp->f_lock);
+ return (0);
+ }
+
+ file_block -= NDADDR;
+
+ /*
+ * nindir[0] = NINDIR
+ * nindir[1] = NINDIR**2
+ * nindir[2] = NINDIR**3
+ * etc
+ */
+ for (level = 0; level < NIADDR; level++) {
+ if (file_block < fp->f_nindir[level])
+ break;
+ file_block -= fp->f_nindir[level];
+ }
+ if (level == NIADDR) {
+ /* Block number too high */
+ mutex_unlock(&fp->f_lock);
+ return (FS_NOT_IN_FILE);
+ }
+
+ ind_block_num = fp->i_ic.i_block[level + NDADDR];
+
+ /*
+ * Initialize array of blocks to free.
+ */
+ for (idx = 0; idx < NIADDR; idx++)
+ oldsize[idx] = 0;
+
+ for (; level >= 0; level--) {
+
+ vm_offset_t data;
+ mach_msg_type_number_t size;
+
+ if (ind_block_num == 0)
+ break;
+
+ if (fp->f_blkno[level] == ind_block_num) {
+ /*
+ * Cache hit. Just pick up the data.
+ */
+
+ data = fp->f_blk[level];
+ }
+ else {
+ /*
+ * Drop our lock while doing the read.
+ * (The f_dev and f_fs fields don`t change.)
+ */
+ mutex_unlock(&fp->f_lock);
+
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fp->f_fs, ind_block_num),
+ EXT2_BLOCK_SIZE(fp->f_fs),
+ (char **)&data,
+ &size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ /*
+ * See if we can cache the data. Need a write lock to
+ * do this. While we hold the write lock, we can`t do
+ * *anything* which might block for memory. Otherwise
+ * a non-privileged thread might deadlock with the
+ * privileged threads. We can`t block while taking the
+ * write lock. Otherwise a non-privileged thread
+ * blocked in the vm_deallocate (while holding a read
+ * lock) will block a privileged thread. For the same
+ * reason, we can`t take a read lock and then use
+ * lock_read_to_write.
+ */
+
+ mutex_lock(&fp->f_lock);
+
+ olddata[level] = fp->f_blk[level];
+ oldsize[level] = fp->f_blksize[level];
+
+ fp->f_blkno[level] = ind_block_num;
+ fp->f_blk[level] = data;
+ fp->f_blksize[level] = size;
+
+ /*
+ * Return to holding a read lock, and
+ * dispose of old data.
+ */
+
+ }
+
+ if (level > 0) {
+ idx = file_block / fp->f_nindir[level-1];
+ file_block %= fp->f_nindir[level-1];
+ }
+ else
+ idx = file_block;
+
+ ind_block_num = ((daddr_t *)data)[idx];
+ }
+
+ mutex_unlock(&fp->f_lock);
+
+ /*
+ * After unlocking the file, free any blocks that
+ * we need to free.
+ */
+ for (idx = 0; idx < NIADDR; idx++)
+ if (oldsize[idx] != 0)
+ (void) vm_deallocate(mach_task_self(),
+ olddata[idx],
+ oldsize[idx]);
+
+ *disk_block_p = ind_block_num;
+ return (0);
+}
+
+/*
+ * Read a portion of a file into an internal buffer. Return
+ * the location in the buffer and the amount in the buffer.
+ */
+static int
+buf_read_file(fp, offset, buf_p, size_p)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t *buf_p; /* out */
+ vm_size_t *size_p; /* out */
+{
+ register
+ struct ext2_super_block *fs;
+ vm_offset_t off;
+ register daddr_t file_block;
+ daddr_t disk_block;
+ int rc;
+ vm_offset_t block_size;
+
+ if (offset >= fp->i_ic.i_size)
+ return (FS_NOT_IN_FILE);
+
+ fs = fp->f_fs;
+
+ off = blkoff(fs, offset);
+ file_block = lblkno(fs, offset);
+ block_size = blksize(fs, fp, file_block);
+
+ if (file_block != fp->f_buf_blkno) {
+ rc = block_map(fp, file_block, &disk_block);
+ if (rc != 0)
+ return (rc);
+
+ if (fp->f_buf)
+ (void)vm_deallocate(mach_task_self(),
+ fp->f_buf,
+ fp->f_buf_size);
+
+ if (disk_block == 0) {
+ (void)vm_allocate(mach_task_self(),
+ &fp->f_buf,
+ block_size,
+ TRUE);
+ fp->f_buf_size = block_size;
+ }
+ else {
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fs, disk_block),
+ (int) block_size,
+ (char **) &fp->f_buf,
+ (mach_msg_type_number_t *)&fp->f_buf_size);
+ }
+ if (rc)
+ return (rc);
+
+ fp->f_buf_blkno = file_block;
+ }
+
+ /*
+ * Return address of byte in buffer corresponding to
+ * offset, and size of remainder of buffer after that
+ * byte.
+ */
+ *buf_p = fp->f_buf + off;
+ *size_p = block_size - off;
+
+ /*
+ * But truncate buffer at end of file.
+ */
+ if (*size_p > fp->i_ic.i_size - offset)
+ *size_p = fp->i_ic.i_size - offset;
+
+ return (0);
+}
+
+/*
+ * Search a directory for a name and return its
+ * i_number.
+ */
+static int
+search_directory(name, fp, inumber_p)
+ char * name;
+ register struct file *fp;
+ ino_t *inumber_p; /* out */
+{
+ vm_offset_t buf;
+ vm_size_t buf_size;
+ vm_offset_t offset;
+ register struct ext2_dir_entry *dp;
+ int length;
+ kern_return_t rc;
+ char tmp_name[256];
+
+ length = strlen(name);
+
+ offset = 0;
+ while (offset < fp->i_ic.i_size) {
+ rc = buf_read_file(fp, offset, &buf, &buf_size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ dp = (struct ext2_dir_entry *)buf;
+ if (dp->inode != 0) {
+ strncpy (tmp_name, dp->name, dp->name_len);
+ tmp_name[dp->name_len] = '\0';
+ if (dp->name_len == length &&
+ !strcmp(name, tmp_name))
+ {
+ /* found entry */
+ *inumber_p = dp->inode;
+ return (0);
+ }
+ }
+ offset += dp->rec_len;
+ }
+ return (FS_NO_ENTRY);
+}
+
+static int
+read_fs(dev, fsp, gdp, gd_size_p)
+ mach_port_t dev;
+ struct ext2_super_block **fsp;
+ struct ext2_group_desc **gdp;
+ vm_size_t *gd_size_p;
+{
+ register
+ struct ext2_super_block *fs;
+ vm_offset_t buf;
+ vm_offset_t buf2;
+ mach_msg_type_number_t buf_size;
+ mach_msg_type_number_t buf2_size;
+ int error;
+ int gd_count;
+ int gd_blocks;
+ int gd_size;
+ int gd_location;
+ int gd_sector;
+
+ /*
+ * Read the super block
+ */
+ error = device_read(dev, 0, (recnum_t) SBLOCK, SBSIZE,
+ (char **) &buf, &buf_size);
+ if (error)
+ return (error);
+
+ /*
+ * Check the superblock
+ */
+ fs = (struct ext2_super_block *)buf;
+ if (fs->s_magic != EXT2_SUPER_MAGIC) {
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+ return (FS_INVALID_FS);
+ }
+
+ *fsp = fs;
+
+ /*
+ * Compute the groups informations
+ */
+ gd_count = (fs->s_blocks_count - fs->s_first_data_block +
+ fs->s_blocks_per_group - 1) / fs->s_blocks_per_group;
+ gd_blocks = (gd_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
+ EXT2_DESC_PER_BLOCK(fs);
+ gd_size = gd_blocks * EXT2_BLOCK_SIZE(fs);
+ gd_location = fs->s_first_data_block + 1;
+ gd_sector = (gd_location * EXT2_BLOCK_SIZE(fs)) / DEV_BSIZE;
+
+ /*
+ * Read the groups descriptors
+ */
+ error = device_read(dev, 0, (recnum_t) gd_sector, gd_size,
+ (char **) &buf2, &buf2_size);
+ if (error) {
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+ return error;
+ }
+
+ *gdp = (struct ext2_group_desc *) buf2;
+ *gd_size_p = gd_size;
+
+ return 0;
+}
+
+static int
+mount_fs(fp)
+ register struct file *fp;
+{
+ register struct ext2_super_block *fs;
+ int error;
+
+ error = read_fs(fp->f_dev, &fp->f_fs, &fp->f_gd, &fp->f_gd_size);
+ if (error)
+ return (error);
+
+ fs = fp->f_fs;
+
+ /*
+ * Calculate indirect block levels.
+ */
+ {
+ register int mult;
+ register int level;
+
+ mult = 1;
+ for (level = 0; level < NIADDR; level++) {
+ mult *= NINDIR(fs);
+ fp->f_nindir[level] = mult;
+ }
+ }
+
+ return (0);
+}
+
+static void
+unmount_fs(fp)
+ register struct file *fp;
+{
+ if (file_is_structured(fp)) {
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fp->f_fs,
+ SBSIZE);
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fp->f_gd,
+ fp->f_gd_size);
+ fp->f_fs = 0;
+ }
+}
+
+/*
+ * Open a file.
+ */
+int
+ext2_open_file(master_device_port, path, fp)
+ mach_port_t master_device_port;
+ char * path;
+ struct file *fp;
+{
+#define RETURN(code) { rc = (code); goto exit; }
+
+ register char *cp, *component;
+ register int c; /* char */
+ register int rc;
+ ino_t inumber, parent_inumber;
+ int nlinks = 0;
+
+ char namebuf[MAXPATHLEN+1];
+
+ if (path == 0 || *path == '\0') {
+ return FS_NO_ENTRY;
+ }
+
+ /*
+ * Copy name into buffer to allow modifying it.
+ */
+ strcpy(namebuf, path);
+
+ /*
+ * Look for '/dev/xxx' at start of path, for
+ * root device.
+ */
+ if (!strprefix(namebuf, "/dev/")) {
+ printf("no device name\n");
+ return FS_NO_ENTRY;
+ }
+
+ cp = namebuf + 5; /* device */
+ component = cp;
+ while ((c = *cp) != '\0' && c != '/') {
+ cp++;
+ }
+ *cp = '\0';
+
+ bzero (fp, sizeof (struct file));
+
+ rc = device_open(master_device_port,
+ D_READ|D_WRITE,
+ component,
+ &fp->f_dev);
+ if (rc)
+ return rc;
+
+ if (c == 0) {
+ fp->f_fs = 0;
+ goto out_ok;
+ }
+
+ *cp = c;
+
+ rc = mount_fs(fp);
+ if (rc)
+ return rc;
+
+ inumber = (ino_t) ROOTINO;
+ if ((rc = read_inode(inumber, fp)) != 0) {
+ printf("can't read root inode\n");
+ goto exit;
+ }
+
+ while (*cp) {
+
+ /*
+ * Check that current node is a directory.
+ */
+ if ((fp->i_ic.i_mode & IFMT) != IFDIR)
+ RETURN (FS_NOT_DIRECTORY);
+
+ /*
+ * Remove extra separators
+ */
+ while (*cp == '/')
+ cp++;
+
+ /*
+ * Get next component of path name.
+ */
+ component = cp;
+ {
+ register int len = 0;
+
+ while ((c = *cp) != '\0' && c != '/') {
+ if (len++ > MAXNAMLEN)
+ RETURN (FS_NAME_TOO_LONG);
+ if (c & 0200)
+ RETURN (FS_INVALID_PARAMETER);
+ cp++;
+ }
+ *cp = 0;
+ }
+
+ /*
+ * Look up component in current directory.
+ * Save directory inumber in case we find a
+ * symbolic link.
+ */
+ parent_inumber = inumber;
+ rc = search_directory(component, fp, &inumber);
+ if (rc) {
+ printf("%s: not found\n", path);
+ goto exit;
+ }
+ *cp = c;
+
+ /*
+ * Open next component.
+ */
+ if ((rc = read_inode(inumber, fp)) != 0)
+ goto exit;
+
+ /*
+ * Check for symbolic link.
+ */
+ if ((fp->i_ic.i_mode & IFMT) == IFLNK) {
+
+ int link_len = fp->i_ic.i_size;
+ int len;
+
+ len = strlen(cp) + 1;
+
+ if (link_len + len >= MAXPATHLEN - 1)
+ RETURN (FS_NAME_TOO_LONG);
+
+ if (++nlinks > MAXSYMLINKS)
+ RETURN (FS_SYMLINK_LOOP);
+
+ ovbcopy(cp, &namebuf[link_len], len);
+
+#ifdef IC_FASTLINK
+ if (fp->i_ic.i_blocks == 0) {
+ bcopy(fp->i_ic.i_block, namebuf, (unsigned) link_len);
+ }
+ else
+#endif IC_FASTLINK
+ {
+ /*
+ * Read file for symbolic link
+ */
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ daddr_t disk_block;
+ register struct ext2_super_block *fs = fp->f_fs;
+
+ (void) block_map(fp, (daddr_t)0, &disk_block);
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fs, disk_block),
+ (int) blksize(fs, fp, 0),
+ (char **) &buf,
+ &buf_size);
+ if (rc)
+ goto exit;
+
+ bcopy((char *)buf, namebuf, (unsigned)link_len);
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+ }
+
+ /*
+ * If relative pathname, restart at parent directory.
+ * If absolute pathname, restart at root.
+ * If pathname begins '/dev/<device>/',
+ * restart at root of that device.
+ */
+ cp = namebuf;
+ if (*cp != '/') {
+ inumber = parent_inumber;
+ }
+ else if (!strprefix(cp, "/dev/")) {
+ inumber = (ino_t)ROOTINO;
+ }
+ else {
+ cp += 5;
+ component = cp;
+ while ((c = *cp) != '\0' && c != '/') {
+ cp++;
+ }
+ *cp = '\0';
+
+ /*
+ * Unmount current file system and free buffers.
+ */
+ ext2_close_file(fp);
+
+ /*
+ * Open new root device.
+ */
+ rc = device_open(master_device_port,
+ D_READ,
+ component,
+ &fp->f_dev);
+ if (rc)
+ return (rc);
+
+ if (c == 0) {
+ fp->f_fs = 0;
+ goto out_ok;
+ }
+
+ *cp = c;
+
+ rc = mount_fs(fp);
+ if (rc)
+ return (rc);
+
+ inumber = (ino_t)ROOTINO;
+ }
+ if ((rc = read_inode(inumber, fp)) != 0)
+ goto exit;
+ }
+ }
+
+ /*
+ * Found terminal component.
+ */
+ out_ok:
+ mutex_init(&fp->f_lock);
+ return 0;
+
+ /*
+ * At error exit, close file to free storage.
+ */
+ exit:
+ ext2_close_file(fp);
+ return rc;
+}
+
+/*
+ * Close file - free all storage used.
+ */
+void
+ext2_close_file(fp)
+ register struct file *fp;
+{
+ register int i;
+
+ /*
+ * Free the disk super-block.
+ */
+ unmount_fs(fp);
+
+ /*
+ * Free the inode and data buffers.
+ */
+ free_file_buffers(fp);
+}
+
+int
+ext2_file_is_directory(struct file *fp)
+{
+ return (fp->i_ic.i_mode & IFMT) == IFDIR;
+}
+
+int
+ext2_file_is_regular(struct file *fp)
+{
+ return (fp->i_ic.i_mode & IFMT) == IFREG;
+}
+
+/*
+ * Copy a portion of a file into kernel memory.
+ * Cross block boundaries when necessary.
+ */
+int
+ext2_read_file(fp, offset, start, size, resid)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_size_t *resid; /* out */
+{
+ int rc;
+ register vm_size_t csize;
+ vm_offset_t buf;
+ vm_size_t buf_size;
+
+ while (size != 0) {
+ rc = buf_read_file(fp, offset, &buf, &buf_size);
+ if (rc)
+ return (rc);
+
+ csize = size;
+ if (csize > buf_size)
+ csize = buf_size;
+ if (csize == 0)
+ break;
+
+ bcopy((char *)buf, (char *)start, csize);
+
+ offset += csize;
+ start += csize;
+ size -= csize;
+ }
+ if (resid)
+ *resid = size;
+
+ return (0);
+}
+
+/* simple utility: only works for 2^n */
+static int
+log2(n)
+ register unsigned int n;
+{
+ register int i = 0;
+
+ while ((n & 1) == 0) {
+ i++;
+ n >>= 1;
+ }
+ return i;
+}
+
+/*
+ * Make an empty file_direct for a device.
+ */
+int
+ext2_open_file_direct(dev, fdp, is_structured)
+ mach_port_t dev;
+ register struct file_direct *fdp;
+ boolean_t is_structured;
+{
+ struct ext2_super_block *fs;
+ struct ext2_group_desc *gd;
+ vm_size_t gd_size;
+ int rc;
+
+ if (!is_structured) {
+ fdp->fd_dev = dev;
+ fdp->fd_blocks = (daddr_t *) 0;
+ fdp->fd_bsize = vm_page_size;
+ fdp->fd_bshift = log2(vm_page_size);
+ fdp->fd_fsbtodb = 0; /* later */
+ fdp->fd_size = 0; /* later */
+ return 0;
+ }
+
+ rc = read_fs(dev, &fs, &gd, &gd_size);
+ if (rc)
+ return rc;
+
+ fdp->fd_dev = dev;
+ fdp->fd_blocks = (daddr_t *) 0;
+ fdp->fd_size = 0;
+ fdp->fd_bsize = EXT2_BLOCK_SIZE(fs);
+ fdp->fd_bshift = log2(fdp->fd_bsize);
+ fdp->fd_fsbtodb = log2(fdp->fd_bsize / DEV_BSIZE);
+
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fs,
+ SBSIZE);
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) gd,
+ gd_size);
+
+ return 0;
+}
+
+/*
+ * Add blocks from a file to a file_direct.
+ */
+int
+ext2_add_file_direct(fdp, fp)
+ register struct file_direct *fdp;
+ register struct file *fp;
+{
+ register struct ext2_super_block *fs;
+ long num_blocks, i;
+ vm_offset_t buffer;
+ vm_size_t size;
+ int rc;
+
+ /* the file must be on the same device */
+
+ if (fdp->fd_dev != fp->f_dev)
+ return FS_INVALID_FS;
+
+ if (!file_is_structured(fp)) {
+ int result[DEV_GET_SIZE_COUNT];
+ natural_t count;
+
+ count = DEV_GET_SIZE_COUNT;
+ rc = device_get_status( fdp->fd_dev, DEV_GET_SIZE,
+ result, &count);
+ if (rc)
+ return rc;
+ fdp->fd_size = result[DEV_GET_SIZE_DEVICE_SIZE] >> fdp->fd_bshift;
+ fdp->fd_fsbtodb = log2(fdp->fd_bsize/result[DEV_GET_SIZE_RECORD_SIZE]);
+ return 0;
+ }
+
+ /* it must hold a file system */
+
+ fs = fp->f_fs;
+/*
+ if (fdp->fd_bsize != fs->fs_bsize ||
+ fdp->fd_fsbtodb != fs->fs_fsbtodb)
+*/
+ if (fdp->fd_bsize != EXT2_BLOCK_SIZE(fs))
+ return FS_INVALID_FS;
+
+ /* calculate number of blocks in the file, ignoring fragments */
+
+ num_blocks = lblkno(fs, fp->i_ic.i_size);
+
+ /* allocate memory for a bigger array */
+
+ size = (num_blocks + fdp->fd_size) * sizeof(daddr_t);
+ rc = vm_allocate(mach_task_self(), &buffer, size, TRUE);
+ if (rc != KERN_SUCCESS)
+ return rc;
+
+ /* lookup new block addresses */
+
+ for (i = 0; i < num_blocks; i++) {
+ daddr_t disk_block;
+
+ rc = block_map(fp, (daddr_t) i, &disk_block);
+ if (rc != 0) {
+ (void) vm_deallocate(mach_task_self(), buffer, size);
+ return rc;
+ }
+
+ ((daddr_t *) buffer)[fdp->fd_size + i] = disk_block;
+ }
+
+ /* copy old addresses and install the new array */
+
+ if (fdp->fd_blocks != 0) {
+ bcopy((char *) fdp->fd_blocks, (char *) buffer,
+ fdp->fd_size * sizeof(daddr_t));
+
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fdp->fd_blocks,
+ (vm_size_t) (fdp->fd_size * sizeof(daddr_t)));
+ }
+ fdp->fd_blocks = (daddr_t *) buffer;
+ fdp->fd_size += num_blocks;
+
+ /* deallocate cached blocks */
+
+ free_file_buffers(fp);
+
+ return 0;
+}
+
+int
+ext2_remove_file_direct(fdp)
+ struct file_direct *fdp;
+{
+ if (fdp->fd_blocks)
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fdp->fd_blocks,
+ (vm_size_t) (fdp->fd_size * sizeof(daddr_t)));
+ fdp->fd_blocks = 0; /* sanity */
+ /* xxx should lose a ref to fdp->fd_dev here (and elsewhere) xxx */
+}
+
+/*
+ * Special read and write routines for default pager.
+ * Assume that all offsets and sizes are multiples
+ * of DEV_BSIZE.
+ */
+
+#define fdir_blkoff(fdp, offset) /* offset % fd_bsize */ \
+ ((offset) & ((fdp)->fd_bsize - 1))
+#define fdir_lblkno(fdp, offset) /* offset / fd_bsize */ \
+ ((offset) >> (fdp)->fd_bshift)
+
+#define fdir_fsbtodb(fdp, block) /* offset * fd_bsize / DEV_BSIZE */ \
+ ((block) << (fdp)->fd_fsbtodb)
+
+/*
+ * Read all or part of a data block, and
+ * return a pointer to the appropriate part.
+ * Caller must deallocate the block when done.
+ */
+int
+ext2_page_read_file_direct(fdp, offset, size, addr, size_read)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_size_t size;
+ vm_offset_t *addr; /* out */
+ mach_msg_type_number_t *size_read; /* out */
+{
+ vm_offset_t off;
+ register daddr_t file_block;
+ daddr_t disk_block;
+
+ if (offset % DEV_BSIZE != 0 ||
+ size % DEV_BSIZE != 0)
+ panic("page_read_file_direct");
+
+ if (offset >= (fdp->fd_size << fdp->fd_bshift))
+ return (FS_NOT_IN_FILE);
+
+ off = fdir_blkoff(fdp, offset);
+ file_block = fdir_lblkno(fdp, offset);
+
+ if (file_is_device(fdp)) {
+ disk_block = file_block;
+ } else {
+ disk_block = fdp->fd_blocks[file_block];
+ if (disk_block == 0)
+ return (FS_NOT_IN_FILE);
+ }
+
+ if (size > fdp->fd_bsize)
+ size = fdp->fd_bsize;
+
+ return (device_read(fdp->fd_dev,
+ 0,
+ (recnum_t) (fdir_fsbtodb(fdp, disk_block) + btodb(off)),
+ (int) size,
+ (char **) addr,
+ size_read));
+}
+
+/*
+ * Write all or part of a data block, and
+ * return the amount written.
+ */
+int
+ext2_page_write_file_direct(fdp, offset, addr, size, size_written)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_offset_t *size_written; /* out */
+{
+ vm_offset_t off;
+ register daddr_t file_block;
+ daddr_t disk_block;
+ int rc, num_written;
+ vm_offset_t block_size;
+
+ if (offset % DEV_BSIZE != 0 ||
+ size % DEV_BSIZE != 0)
+ panic("page_write_file");
+
+ if (offset >= (fdp->fd_size << fdp->fd_bshift))
+ return (FS_NOT_IN_FILE);
+
+ off = fdir_blkoff(fdp, offset);
+ file_block = fdir_lblkno(fdp, offset);
+
+ if (file_is_device(fdp)) {
+ disk_block = file_block;
+ } else {
+ disk_block = fdp->fd_blocks[file_block];
+ if (disk_block == 0)
+ return (FS_NOT_IN_FILE);
+ }
+
+ if (size > fdp->fd_bsize)
+ size = fdp->fd_bsize;
+
+ /*
+ * Write the data. Wait for completion to keep
+ * reads from getting ahead of writes and reading
+ * stale data.
+ */
+ rc = device_write(
+ fdp->fd_dev,
+ 0,
+ (recnum_t) (fdir_fsbtodb(fdp, disk_block) + btodb(off)),
+ (char *) addr,
+ size,
+ &num_written);
+ *size_written = num_written;
+ return rc;
+}
+
diff --git a/serverboot/ext2_fs.h b/serverboot/ext2_fs.h
new file mode 100644
index 00000000..4068c002
--- /dev/null
+++ b/serverboot/ext2_fs.h
@@ -0,0 +1,451 @@
+/*
+ * linux/include/linux/ext2_fs.h
+ *
+ * Copyright (C) 1992, 1993, 1994 Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/include/linux/minix_fs.h
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#ifndef _LINUX_EXT2_FS_H
+#define _LINUX_EXT2_FS_H
+
+/*
+ * The second extended filesystem constants/structures
+ */
+
+/*
+ * Define EXT2FS_DEBUG to produce debug messages
+ */
+#undef EXT2FS_DEBUG
+
+/*
+ * Define EXT2FS_DEBUG_CACHE to produce cache debug messages
+ */
+#undef EXT2FS_DEBUG_CACHE
+
+/*
+ * Define EXT2FS_CHECK_CACHE to add some checks to the name cache code
+ */
+#undef EXT2FS_CHECK_CACHE
+
+/*
+ * Define EXT2FS_PRE_02B_COMPAT to convert ext 2 fs prior to 0.2b
+ */
+#undef EXT2FS_PRE_02B_COMPAT
+
+/*
+ * Define DONT_USE_DCACHE to inhibit the directory cache
+ */
+#define DONT_USE_DCACHE
+
+/*
+ * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files
+ */
+#define EXT2_PREALLOCATE
+
+/*
+ * The second extended file system version
+ */
+#define EXT2FS_DATE "94/03/10"
+#define EXT2FS_VERSION "0.5"
+
+/*
+ * Debug code
+ */
+#ifdef EXT2FS_DEBUG
+# define ext2_debug(f, a...) { \
+ printk ("EXT2-fs DEBUG (%s, %d): %s:", \
+ __FILE__, __LINE__, __FUNCTION__); \
+ printk (f, ## a); \
+ }
+#else
+# define ext2_debug(f, a...) /**/
+#endif
+
+/*
+ * Special inodes numbers
+ */
+#define EXT2_BAD_INO 1 /* Bad blocks inode */
+#define EXT2_ROOT_INO 2 /* Root inode */
+#define EXT2_ACL_IDX_INO 3 /* ACL inode */
+#define EXT2_ACL_DATA_INO 4 /* ACL inode */
+#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */
+#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */
+#define EXT2_FIRST_INO 11 /* First non reserved inode */
+
+/*
+ * The second extended file system magic number
+ */
+#define EXT2_PRE_02B_MAGIC 0xEF51
+#define EXT2_SUPER_MAGIC 0xEF53
+
+/*
+ * Maximal count of links to a file
+ */
+#define EXT2_LINK_MAX 32000
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT2_MIN_BLOCK_SIZE 1024
+#define EXT2_MAX_BLOCK_SIZE 4096
+#define EXT2_MIN_BLOCK_LOG_SIZE 10
+#ifdef __KERNEL__
+# define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize)
+#else
+# define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#endif
+#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry))
+#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (unsigned long))
+#ifdef __KERNEL__
+# define EXT2_BLOCK_SIZE_BITS(s) ((s)->u.ext2_sb.s_es->s_log_block_size + 10)
+#else
+# define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
+#endif
+#define EXT2_INODES_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_inode))
+
+/*
+ * Macro-instructions used to manage fragments
+ */
+#define EXT2_MIN_FRAG_SIZE 1024
+#define EXT2_MAX_FRAG_SIZE 4096
+#define EXT2_MIN_FRAG_LOG_SIZE 10
+#ifdef __KERNEL__
+# define EXT2_FRAG_SIZE(s) ((s)->u.ext2_sb.s_frag_size)
+# define EXT2_FRAGS_PER_BLOCK(s) ((s)->u.ext2_sb.s_frags_per_block)
+#else
+# define EXT2_FRAG_SIZE(s) (EXT2_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+# define EXT2_FRAGS_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s))
+#endif
+
+/*
+ * ACL structures
+ */
+struct ext2_acl_header /* Header of Access Control Lists */
+{
+ unsigned long aclh_size;
+ unsigned long aclh_file_count;
+ unsigned long aclh_acle_count;
+ unsigned long aclh_first_acle;
+};
+
+struct ext2_acl_entry /* Access Control List Entry */
+{
+ unsigned long acle_size;
+ unsigned short acle_perms; /* Access permissions */
+ unsigned short acle_type; /* Type of entry */
+ unsigned short acle_tag; /* User or group identity */
+ unsigned short acle_pad1;
+ unsigned long acle_next; /* Pointer on next entry for the */
+ /* same inode or on next free entry */
+};
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext2_old_group_desc
+{
+ unsigned long bg_block_bitmap; /* Blocks bitmap block */
+ unsigned long bg_inode_bitmap; /* Inodes bitmap block */
+ unsigned long bg_inode_table; /* Inodes table block */
+ unsigned short bg_free_blocks_count; /* Free blocks count */
+ unsigned short bg_free_inodes_count; /* Free inodes count */
+};
+
+struct ext2_group_desc
+{
+ unsigned long bg_block_bitmap; /* Blocks bitmap block */
+ unsigned long bg_inode_bitmap; /* Inodes bitmap block */
+ unsigned long bg_inode_table; /* Inodes table block */
+ unsigned short bg_free_blocks_count; /* Free blocks count */
+ unsigned short bg_free_inodes_count; /* Free inodes count */
+ unsigned short bg_used_dirs_count; /* Directories count */
+ unsigned short bg_pad;
+ unsigned long bg_reserved[3];
+};
+
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#ifdef __KERNEL__
+# define EXT2_BLOCKS_PER_GROUP(s) ((s)->u.ext2_sb.s_blocks_per_group)
+# define EXT2_DESC_PER_BLOCK(s) ((s)->u.ext2_sb.s_desc_per_block)
+# define EXT2_INODES_PER_GROUP(s) ((s)->u.ext2_sb.s_inodes_per_group)
+#else
+# define EXT2_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
+# define EXT2_DESC_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_group_desc))
+# define EXT2_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
+#endif
+
+/*
+ * Constants relative to the data blocks
+ */
+#define EXT2_NDIR_BLOCKS 12
+#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS
+#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1)
+#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1)
+#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define EXT2_SECRM_FL 0x0001 /* Secure deletion */
+#define EXT2_UNRM_FL 0x0002 /* Undelete */
+#define EXT2_COMPR_FL 0x0004 /* Compress file */
+#define EXT2_SYNC_FL 0x0008 /* Synchronous updates */
+
+/*
+ * ioctl commands
+ */
+#define EXT2_IOC_GETFLAGS _IOR('f', 1, long)
+#define EXT2_IOC_SETFLAGS _IOW('f', 2, long)
+#define EXT2_IOC_GETVERSION _IOR('v', 1, long)
+#define EXT2_IOC_SETVERSION _IOW('v', 2, long)
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext2_inode {
+ unsigned short i_mode; /* File mode */
+ unsigned short i_uid; /* Owner Uid */
+ unsigned long i_size; /* Size in bytes */
+ unsigned long i_atime; /* Access time */
+ unsigned long i_ctime; /* Creation time */
+ unsigned long i_mtime; /* Modification time */
+ unsigned long i_dtime; /* Deletion Time */
+ unsigned short i_gid; /* Group Id */
+ unsigned short i_links_count; /* Links count */
+ unsigned long i_blocks; /* Blocks count */
+ unsigned long i_flags; /* File flags */
+ unsigned long i_reserved1;
+ unsigned long i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
+ unsigned long i_version; /* File version (for NFS) */
+ unsigned long i_file_acl; /* File ACL */
+ unsigned long i_dir_acl; /* Directory ACL */
+ unsigned long i_faddr; /* Fragment address */
+ unsigned char i_frag; /* Fragment number */
+ unsigned char i_fsize; /* Fragment size */
+ unsigned short i_pad1;
+ unsigned long i_reserved2[2];
+};
+
+/*
+ * File system states
+ */
+#define EXT2_VALID_FS 0x0001 /* Unmounted cleany */
+#define EXT2_ERROR_FS 0x0002 /* Errors detected */
+
+/*
+ * Mount flags
+ */
+#define EXT2_MOUNT_CHECK_NORMAL 0x0001 /* Do some more checks */
+#define EXT2_MOUNT_CHECK_STRICT 0x0002 /* Do again more checks */
+#define EXT2_MOUNT_CHECK (EXT2_MOUNT_CHECK_NORMAL | \
+ EXT2_MOUNT_CHECK_STRICT)
+#define EXT2_MOUNT_GRPID 0x0004 /* Create files with directory's group */
+#define EXT2_MOUNT_DEBUG 0x0008 /* Some debugging messages */
+#define EXT2_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */
+#define EXT2_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */
+#define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */
+
+#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt
+#define set_opt(o, opt) o |= EXT2_MOUNT_##opt
+#define test_opt(sb, opt) ((sb)->u.ext2_sb.s_mount_opt & \
+ EXT2_MOUNT_##opt)
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
+#define EXT2_DFL_CHECKINTERVAL 0 /* Don't use interval check */
+
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */
+#define EXT2_ERRORS_RO 2 /* Remount fs read-only */
+#define EXT2_ERRORS_PANIC 3 /* Panic */
+#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE
+
+/*
+ * Structure of the super block
+ */
+struct ext2_super_block {
+ unsigned long s_inodes_count; /* Inodes count */
+ unsigned long s_blocks_count; /* Blocks count */
+ unsigned long s_r_blocks_count;/* Reserved blocks count */
+ unsigned long s_free_blocks_count;/* Free blocks count */
+ unsigned long s_free_inodes_count;/* Free inodes count */
+ unsigned long s_first_data_block;/* First Data Block */
+ unsigned long s_log_block_size;/* Block size */
+ long s_log_frag_size; /* Fragment size */
+ unsigned long s_blocks_per_group;/* # Blocks per group */
+ unsigned long s_frags_per_group;/* # Fragments per group */
+ unsigned long s_inodes_per_group;/* # Inodes per group */
+ unsigned long s_mtime; /* Mount time */
+ unsigned long s_wtime; /* Write time */
+ unsigned short s_mnt_count; /* Mount count */
+ short s_max_mnt_count; /* Maximal mount count */
+ unsigned short s_magic; /* Magic signature */
+ unsigned short s_state; /* File system state */
+ unsigned short s_errors; /* Behaviour when detecting errors */
+ unsigned short s_pad;
+ unsigned long s_lastcheck; /* time of last check */
+ unsigned long s_checkinterval; /* max. time between checks */
+ unsigned long s_reserved[238]; /* Padding to the end of the block */
+};
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT2_NAME_LEN 255
+
+struct ext2_dir_entry {
+ unsigned long inode; /* Inode number */
+ unsigned short rec_len; /* Directory entry length */
+ unsigned short name_len; /* Name length */
+ char name[EXT2_NAME_LEN]; /* File name */
+};
+
+/*
+ * EXT2_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT2_DIR_PAD 4
+#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1)
+#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \
+ ~EXT2_DIR_ROUND)
+
+#ifdef __KERNEL__
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext2 source programs needs to include it so they are duplicated here.
+ */
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
+# define NORET_TYPE __volatile__
+# define ATTRIB_NORET /**/
+# define NORET_AND /**/
+#else
+# define NORET_TYPE /**/
+# define ATTRIB_NORET __attribute__((noreturn))
+# define NORET_AND noreturn,
+#endif
+
+/* acl.c */
+extern int ext2_permission (struct inode *, int);
+
+/* balloc.c */
+extern int ext2_new_block (struct super_block *, unsigned long,
+ unsigned long *, unsigned long *);
+extern void ext2_free_blocks (struct super_block *, unsigned long,
+ unsigned long);
+extern unsigned long ext2_count_free_blocks (struct super_block *);
+extern void ext2_check_blocks_bitmap (struct super_block *);
+
+/* bitmap.c */
+extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
+
+#ifndef DONT_USE_DCACHE
+/* dcache.c */
+extern void ext2_dcache_invalidate (unsigned short);
+extern unsigned long ext2_dcache_lookup (unsigned short, unsigned long,
+ const char *, int);
+extern void ext2_dcache_add (unsigned short, unsigned long, const char *,
+ int, unsigned long);
+extern void ext2_dcache_remove (unsigned short, unsigned long, const char *,
+ int);
+#endif
+
+/* dir.c */
+extern int ext2_check_dir_entry (char *, struct inode *,
+ struct ext2_dir_entry *, struct buffer_head *,
+ unsigned long);
+
+/* file.c */
+extern int ext2_read (struct inode *, struct file *, char *, int);
+extern int ext2_write (struct inode *, struct file *, char *, int);
+
+/* fsync.c */
+extern int ext2_sync_file (struct inode *, struct file *);
+
+/* ialloc.c */
+extern struct inode * ext2_new_inode (const struct inode *, int);
+extern void ext2_free_inode (struct inode *);
+extern unsigned long ext2_count_free_inodes (struct super_block *);
+extern void ext2_check_inodes_bitmap (struct super_block *);
+
+/* inode.c */
+extern int ext2_bmap (struct inode *, int);
+
+extern struct buffer_head * ext2_getblk (struct inode *, long, int, int *);
+extern struct buffer_head * ext2_bread (struct inode *, int, int, int *);
+
+extern int ext2_getcluster (struct inode * inode, long block);
+extern void ext2_read_inode (struct inode *);
+extern void ext2_write_inode (struct inode *);
+extern void ext2_put_inode (struct inode *);
+extern int ext2_sync_inode (struct inode *);
+extern void ext2_discard_prealloc (struct inode *);
+
+/* ioctl.c */
+extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
+ unsigned long);
+
+/* namei.c */
+extern int ext2_open (struct inode *, struct file *);
+extern void ext2_release (struct inode *, struct file *);
+extern int ext2_lookup (struct inode *,const char *, int, struct inode **);
+extern int ext2_create (struct inode *,const char *, int, int,
+ struct inode **);
+extern int ext2_mkdir (struct inode *, const char *, int, int);
+extern int ext2_rmdir (struct inode *, const char *, int);
+extern int ext2_unlink (struct inode *, const char *, int);
+extern int ext2_symlink (struct inode *, const char *, int, const char *);
+extern int ext2_link (struct inode *, struct inode *, const char *, int);
+extern int ext2_mknod (struct inode *, const char *, int, int, int);
+extern int ext2_rename (struct inode *, const char *, int,
+ struct inode *, const char *, int);
+
+/* super.c */
+extern void ext2_error (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern NORET_TYPE void ext2_panic (struct super_block *, const char *,
+ const char *, ...)
+ __attribute__ ((NORET_AND format (printf, 3, 4)));
+extern void ext2_warning (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern void ext2_put_super (struct super_block *);
+extern void ext2_write_super (struct super_block *);
+extern int ext2_remount (struct super_block *, int *, char *);
+extern struct super_block * ext2_read_super (struct super_block *,void *,int);
+extern void ext2_statfs (struct super_block *, struct statfs *);
+
+/* truncate.c */
+extern void ext2_truncate (struct inode *);
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern struct inode_operations ext2_dir_inode_operations;
+
+/* file.c */
+extern struct inode_operations ext2_file_inode_operations;
+
+/* symlink.c */
+extern struct inode_operations ext2_symlink_inode_operations;
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_EXT2_FS_H */
diff --git a/serverboot/ffs_compat.c b/serverboot/ffs_compat.c
new file mode 100644
index 00000000..46644a9b
--- /dev/null
+++ b/serverboot/ffs_compat.c
@@ -0,0 +1,63 @@
+/*
+ * BSD FFS like functions used to ease porting bootstrap to Linux ext2 fs
+ * Copyright (C) 1994 Remy Card
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <mach/mach_traps.h>
+#include <mach/mach_interface.h>
+
+#include <file_io.h>
+
+int ino2blk (struct ext2_super_block *fs, struct ext2_group_desc *gd, int ino)
+{
+ int group;
+ int blk;
+
+ group = (ino - 1) / EXT2_INODES_PER_GROUP(fs);
+ blk = gd[group].bg_inode_table +
+ (((ino - 1) % EXT2_INODES_PER_GROUP(fs)) /
+ EXT2_INODES_PER_BLOCK(fs));
+ return blk;
+}
+
+int fsbtodb (struct ext2_super_block *fs, int b)
+{
+ return (b * EXT2_BLOCK_SIZE(fs)) / DEV_BSIZE;
+}
+
+int itoo (struct ext2_super_block *fs, int ino)
+{
+ return (ino - 1) % EXT2_INODES_PER_BLOCK(fs);
+}
+
+int blkoff (struct ext2_super_block * fs, vm_offset_t offset)
+{
+ return offset % EXT2_BLOCK_SIZE(fs);
+}
+
+int lblkno (struct ext2_super_block * fs, vm_offset_t offset)
+{
+ return offset / EXT2_BLOCK_SIZE(fs);
+}
+
+int blksize (struct ext2_super_block *fs, struct file *fp, daddr_t file_block)
+{
+ return EXT2_BLOCK_SIZE(fs); /* XXX - fix for fragments */
+}
diff --git a/serverboot/ffs_compat.h b/serverboot/ffs_compat.h
new file mode 100644
index 00000000..d78840f5
--- /dev/null
+++ b/serverboot/ffs_compat.h
@@ -0,0 +1,54 @@
+/*
+ * BSD FFS like declarations used to ease porting bootstrap to Linux ext2 fs
+ * Copyright (C) 1994 Remy Card
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define SBSIZE EXT2_MIN_BLOCK_SIZE /* Size of superblock */
+#define SBLOCK ((daddr_t) 2) /* Location of superblock */
+
+#define NDADDR EXT2_NDIR_BLOCKS
+#define NIADDR (EXT2_N_BLOCKS - EXT2_NDIR_BLOCKS)
+
+#define MAXNAMLEN 255
+
+#define ROOTINO EXT2_ROOT_INO
+
+#define NINDIR(fs) EXT2_ADDR_PER_BLOCK(fs)
+
+#define IC_FASTLINK
+
+#define IFMT 00170000
+#define IFSOCK 0140000
+#define IFLNK 0120000
+#define IFREG 0100000
+#define IFBLK 0060000
+#define IFDIR 0040000
+#define IFCHR 0020000
+#define IFIFO 0010000
+#define ISUID 0004000
+#define ISGID 0002000
+#define ISVTX 0001000
+
+#define f_fs u.ext2.ext2_fs
+#define f_gd u.ext2.ext2_gd
+#define f_gd_size u.ext2.ext2_gd_size
+#define i_ic u.ext2.ext2_ic
+#define f_nindir u.ext2.ext2_nindir
+#define f_blk u.ext2.ext2_blk
+#define f_blksize u.ext2.ext2_blksize
+#define f_blkno u.ext2.ext2_blkno
+
diff --git a/serverboot/ffs_file_io.c b/serverboot/ffs_file_io.c
new file mode 100644
index 00000000..889ca4e5
--- /dev/null
+++ b/serverboot/ffs_file_io.c
@@ -0,0 +1,1085 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Stand-alone file reading package.
+ */
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <mach/mach_traps.h>
+#include <mach/mach_interface.h>
+
+#include "file_io.h"
+#include "fs.h"
+#include "dir.h"
+#include "disk_inode_ffs.h"
+
+void close_file(); /* forward */
+
+/*
+ * Free file buffers, but don't close file.
+ */
+static void
+free_file_buffers(fp)
+ register struct file *fp;
+{
+ register int level;
+
+ /*
+ * Free the indirect blocks
+ */
+ for (level = 0; level < NIADDR; level++) {
+ if (fp->f_blk[level] != 0) {
+ (void) vm_deallocate(mach_task_self(),
+ fp->f_blk[level],
+ fp->f_blksize[level]);
+ fp->f_blk[level] = 0;
+ }
+ fp->f_blkno[level] = -1;
+ }
+
+ /*
+ * Free the data block
+ */
+ if (fp->f_buf != 0) {
+ (void) vm_deallocate(mach_task_self(),
+ fp->f_buf,
+ fp->f_buf_size);
+ fp->f_buf = 0;
+ }
+ fp->f_buf_blkno = -1;
+}
+
+/*
+ * Read a new inode into a file structure.
+ */
+static int
+read_inode(inumber, fp)
+ ino_t inumber;
+ register struct file *fp;
+{
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ register struct fs *fs;
+ daddr_t disk_block;
+ kern_return_t rc;
+
+ fs = fp->f_fs;
+ disk_block = itod(fs, inumber);
+
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fp->f_fs, disk_block),
+ (int) fs->fs_bsize,
+ (char **)&buf,
+ &buf_size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ {
+ register struct dinode *dp;
+
+ dp = (struct dinode *)buf;
+ dp += itoo(fs, inumber);
+ fp->i_ic = dp->di_ic;
+ fp->f_size = fp->i_size;
+ }
+
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+
+ /*
+ * Clear out the old buffers
+ */
+ free_file_buffers(fp);
+
+ return (0);
+}
+
+/*
+ * Given an offset in a file, find the disk block number that
+ * contains that block.
+ */
+static int
+block_map(fp, file_block, disk_block_p)
+ struct file *fp;
+ daddr_t file_block;
+ daddr_t *disk_block_p; /* out */
+{
+ int level;
+ int idx;
+ daddr_t ind_block_num;
+ kern_return_t rc;
+
+ vm_offset_t olddata[NIADDR+1];
+ vm_size_t oldsize[NIADDR+1];
+
+ /*
+ * Index structure of an inode:
+ *
+ * i_db[0..NDADDR-1] hold block numbers for blocks
+ * 0..NDADDR-1
+ *
+ * i_ib[0] index block 0 is the single indirect
+ * block
+ * holds block numbers for blocks
+ * NDADDR .. NDADDR + NINDIR(fs)-1
+ *
+ * i_ib[1] index block 1 is the double indirect
+ * block
+ * holds block numbers for INDEX blocks
+ * for blocks
+ * NDADDR + NINDIR(fs) ..
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1
+ *
+ * i_ib[2] index block 2 is the triple indirect
+ * block
+ * holds block numbers for double-indirect
+ * blocks for blocks
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2 ..
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2
+ * + NINDIR(fs)**3 - 1
+ */
+
+ mutex_lock(&fp->f_lock);
+
+ if (file_block < NDADDR) {
+ /* Direct block. */
+ *disk_block_p = fp->i_db[file_block];
+ mutex_unlock(&fp->f_lock);
+ return (0);
+ }
+
+ file_block -= NDADDR;
+
+ /*
+ * nindir[0] = NINDIR
+ * nindir[1] = NINDIR**2
+ * nindir[2] = NINDIR**3
+ * etc
+ */
+ for (level = 0; level < NIADDR; level++) {
+ if (file_block < fp->f_nindir[level])
+ break;
+ file_block -= fp->f_nindir[level];
+ }
+ if (level == NIADDR) {
+ /* Block number too high */
+ mutex_unlock(&fp->f_lock);
+ return (FS_NOT_IN_FILE);
+ }
+
+ ind_block_num = fp->i_ib[level];
+
+ /*
+ * Initialize array of blocks to free.
+ */
+ for (idx = 0; idx < NIADDR; idx++)
+ oldsize[idx] = 0;
+
+ for (; level >= 0; level--) {
+
+ vm_offset_t data;
+ mach_msg_type_number_t size;
+
+ if (ind_block_num == 0)
+ break;
+
+ if (fp->f_blkno[level] == ind_block_num) {
+ /*
+ * Cache hit. Just pick up the data.
+ */
+
+ data = fp->f_blk[level];
+ }
+ else {
+ /*
+ * Drop our lock while doing the read.
+ * (The f_dev and f_fs fields don`t change.)
+ */
+ mutex_unlock(&fp->f_lock);
+
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fp->f_fs, ind_block_num),
+ fp->f_fs->fs_bsize,
+ (char **)&data,
+ &size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ /*
+ * See if we can cache the data. Need a write lock to
+ * do this. While we hold the write lock, we can`t do
+ * *anything* which might block for memory. Otherwise
+ * a non-privileged thread might deadlock with the
+ * privileged threads. We can`t block while taking the
+ * write lock. Otherwise a non-privileged thread
+ * blocked in the vm_deallocate (while holding a read
+ * lock) will block a privileged thread. For the same
+ * reason, we can`t take a read lock and then use
+ * lock_read_to_write.
+ */
+
+ mutex_lock(&fp->f_lock);
+
+ olddata[level] = fp->f_blk[level];
+ oldsize[level] = fp->f_blksize[level];
+
+ fp->f_blkno[level] = ind_block_num;
+ fp->f_blk[level] = data;
+ fp->f_blksize[level] = size;
+
+ /*
+ * Return to holding a read lock, and
+ * dispose of old data.
+ */
+
+ }
+
+ if (level > 0) {
+ idx = file_block / fp->f_nindir[level-1];
+ file_block %= fp->f_nindir[level-1];
+ }
+ else
+ idx = file_block;
+
+ ind_block_num = ((daddr_t *)data)[idx];
+ }
+
+ mutex_unlock(&fp->f_lock);
+
+ /*
+ * After unlocking the file, free any blocks that
+ * we need to free.
+ */
+ for (idx = 0; idx < NIADDR; idx++)
+ if (oldsize[idx] != 0)
+ (void) vm_deallocate(mach_task_self(),
+ olddata[idx],
+ oldsize[idx]);
+
+ *disk_block_p = ind_block_num;
+ return (0);
+}
+
+/*
+ * Read a portion of a file into an internal buffer. Return
+ * the location in the buffer and the amount in the buffer.
+ */
+static int
+buf_read_file(fp, offset, buf_p, size_p)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t *buf_p; /* out */
+ vm_size_t *size_p; /* out */
+{
+ register struct fs *fs;
+ vm_offset_t off;
+ register daddr_t file_block;
+ daddr_t disk_block;
+ int rc;
+ vm_offset_t block_size;
+
+ if (offset >= fp->i_size)
+ return (FS_NOT_IN_FILE);
+
+ fs = fp->f_fs;
+
+ off = blkoff(fs, offset);
+ file_block = lblkno(fs, offset);
+ block_size = blksize(fs, fp, file_block);
+
+ if (file_block != fp->f_buf_blkno) {
+ rc = block_map(fp, file_block, &disk_block);
+ if (rc != 0)
+ return (rc);
+
+ if (fp->f_buf)
+ (void)vm_deallocate(mach_task_self(),
+ fp->f_buf,
+ fp->f_buf_size);
+
+ if (disk_block == 0) {
+ (void)vm_allocate(mach_task_self(),
+ &fp->f_buf,
+ block_size,
+ TRUE);
+ fp->f_buf_size = block_size;
+ }
+ else {
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fs, disk_block),
+ (int) block_size,
+ (char **) &fp->f_buf,
+ (mach_msg_type_number_t *)&fp->f_buf_size);
+ }
+ if (rc)
+ return (rc);
+
+ fp->f_buf_blkno = file_block;
+ }
+
+ /*
+ * Return address of byte in buffer corresponding to
+ * offset, and size of remainder of buffer after that
+ * byte.
+ */
+ *buf_p = fp->f_buf + off;
+ *size_p = block_size - off;
+
+ /*
+ * But truncate buffer at end of file.
+ */
+ if (*size_p > fp->i_size - offset)
+ *size_p = fp->i_size - offset;
+
+ return (0);
+}
+
+/* In 4.4 d_reclen is split into d_type and d_namlen */
+struct dirent_44 {
+ unsigned long d_fileno;
+ unsigned short d_reclen;
+ unsigned char d_type;
+ unsigned char d_namlen;
+ char d_name[255 + 1];
+};
+
+/*
+ * Search a directory for a name and return its
+ * i_number.
+ */
+static int
+search_directory(name, fp, inumber_p)
+ char * name;
+ register struct file *fp;
+ ino_t *inumber_p; /* out */
+{
+ vm_offset_t buf;
+ vm_size_t buf_size;
+ vm_offset_t offset;
+ register struct dirent_44 *dp;
+ int length;
+ kern_return_t rc;
+
+ length = strlen(name);
+
+ offset = 0;
+ while (offset < fp->i_size) {
+ rc = buf_read_file(fp, offset, &buf, &buf_size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ dp = (struct dirent_44 *)buf;
+ if (dp->d_ino != 0) {
+ unsigned short namlen = dp->d_namlen;
+ /*
+ * If namlen is zero, then either this is a 4.3 file
+ * system or the namlen is really zero. In the latter
+ * case also the 4.3 d_namlen field is zero
+ * interpreted either way.
+ */
+ if (namlen == 0)
+ namlen = ((struct direct *)dp)->d_namlen;
+
+ if (namlen == length &&
+ !strcmp(name, dp->d_name))
+ {
+ /* found entry */
+ *inumber_p = dp->d_ino;
+ return (0);
+ }
+ }
+ offset += dp->d_reclen;
+ }
+ return (FS_NO_ENTRY);
+}
+
+static int
+read_fs(dev, fsp)
+ mach_port_t dev;
+ struct fs **fsp;
+{
+ register struct fs *fs;
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ int error;
+
+ error = device_read(dev, 0, (recnum_t) SBLOCK, SBSIZE,
+ (char **) &buf, &buf_size);
+ if (error)
+ return (error);
+
+ fs = (struct fs *)buf;
+ if (fs->fs_magic != FS_MAGIC ||
+ fs->fs_bsize > MAXBSIZE ||
+ fs->fs_bsize < sizeof(struct fs)) {
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+ return (FS_INVALID_FS);
+ }
+ /* don't read cylinder groups - we aren't modifying anything */
+
+ *fsp = fs;
+ return 0;
+}
+
+static int
+mount_fs(fp)
+ register struct file *fp;
+{
+ register struct fs *fs;
+ int error;
+
+ error = read_fs(fp->f_dev, &fp->f_fs);
+ if (error)
+ return (error);
+ fs = fp->f_fs;
+
+ /*
+ * Calculate indirect block levels.
+ */
+ {
+ register int mult;
+ register int level;
+
+ mult = 1;
+ for (level = 0; level < NIADDR; level++) {
+ mult *= NINDIR(fs);
+ fp->f_nindir[level] = mult;
+ }
+ }
+
+ return (0);
+}
+
+static void
+unmount_fs(fp)
+ register struct file *fp;
+{
+ if (file_is_structured(fp)) {
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fp->f_fs,
+ SBSIZE);
+ fp->f_fs = 0;
+ }
+}
+
+/*
+ * Open a file.
+ */
+int
+ffs_open_file(master_device_port, path, fp)
+ mach_port_t master_device_port;
+ char * path;
+ struct file *fp;
+{
+#define RETURN(code) { rc = (code); goto exit; }
+
+ register char *cp, *component;
+ register int c; /* char */
+ register int rc;
+ ino_t inumber, parent_inumber;
+ int nlinks = 0;
+
+ char namebuf[MAXPATHLEN+1];
+
+ if (path == 0 || *path == '\0') {
+ return FS_NO_ENTRY;
+ }
+
+ /*
+ * Copy name into buffer to allow modifying it.
+ */
+ strcpy(namebuf, path);
+
+ /*
+ * Look for '/dev/xxx' at start of path, for
+ * root device.
+ */
+ if (!strprefix(namebuf, "/dev/")) {
+ printf("no device name\n");
+ return FS_NO_ENTRY;
+ }
+
+ cp = namebuf + 5; /* device */
+ component = cp;
+ while ((c = *cp) != '\0' && c != '/') {
+ cp++;
+ }
+ *cp = '\0';
+
+ bzero (fp, sizeof (struct file));
+
+ rc = device_open(master_device_port,
+ D_READ|D_WRITE,
+ component,
+ &fp->f_dev);
+ if (rc)
+ return rc;
+
+ if (c == 0) {
+ fp->f_fs = 0;
+ goto out_ok;
+ }
+
+ *cp = c;
+
+ rc = mount_fs(fp);
+ if (rc)
+ return rc;
+
+ inumber = (ino_t) ROOTINO;
+ if ((rc = read_inode(inumber, fp)) != 0) {
+ printf("can't read root inode\n");
+ goto exit;
+ }
+
+ while (*cp) {
+
+ /*
+ * Check that current node is a directory.
+ */
+ if ((fp->i_mode & IFMT) != IFDIR)
+ RETURN (FS_NOT_DIRECTORY);
+
+ /*
+ * Remove extra separators
+ */
+ while (*cp == '/')
+ cp++;
+
+ /*
+ * Get next component of path name.
+ */
+ component = cp;
+ {
+ register int len = 0;
+
+ while ((c = *cp) != '\0' && c != '/') {
+ if (len++ > MAXNAMLEN)
+ RETURN (FS_NAME_TOO_LONG);
+ if (c & 0200)
+ RETURN (FS_INVALID_PARAMETER);
+ cp++;
+ }
+ *cp = 0;
+ }
+
+ /*
+ * Look up component in current directory.
+ * Save directory inumber in case we find a
+ * symbolic link.
+ */
+ parent_inumber = inumber;
+ rc = search_directory(component, fp, &inumber);
+ if (rc) {
+ printf("%s: not found\n", path);
+ goto exit;
+ }
+ *cp = c;
+
+ /*
+ * Open next component.
+ */
+ if ((rc = read_inode(inumber, fp)) != 0)
+ goto exit;
+
+ /*
+ * Check for symbolic link.
+ */
+ if ((fp->i_mode & IFMT) == IFLNK) {
+
+ int link_len = fp->i_size;
+ int len;
+
+ len = strlen(cp) + 1;
+
+ if (link_len + len >= MAXPATHLEN - 1)
+ RETURN (FS_NAME_TOO_LONG);
+
+ if (++nlinks > MAXSYMLINKS)
+ RETURN (FS_SYMLINK_LOOP);
+
+ ovbcopy(cp, &namebuf[link_len], len);
+
+#ifdef IC_FASTLINK
+ if ((fp->i_flags & IC_FASTLINK) != 0) {
+ bcopy(fp->i_symlink, namebuf, (unsigned) link_len);
+ }
+ else
+#endif IC_FASTLINK
+#if !defined(DISABLE_BSD44_FASTLINKS)
+ /*
+ * There is no bit for fastlinks in 4.4 but instead
+ * all symlinks that fit into the inode are fastlinks.
+ * If the second block (ic_db[1]) is zero the symlink
+ * can't be a fastlink if its length is at least five.
+ * For symlinks of length one to four there is no easy
+ * way of knowing whether we are looking at a 4.4
+ * fastlink or a 4.3 slowlink. This code always
+ * guesses the 4.4 way when in doubt. THIS BREAKS 4.3
+ * SLOWLINKS OF LENGTH FOUR OR LESS.
+ */
+ if ((link_len <= MAX_FASTLINK_SIZE && fp->i_ic.ic_db[1] != 0)
+ || (link_len <= 4))
+ {
+ bcopy(fp->i_symlink, namebuf, (unsigned) link_len);
+ }
+ else
+#endif /* !DISABLE_BSD44_FASTLINKS */
+
+ {
+ /*
+ * Read file for symbolic link
+ */
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ daddr_t disk_block;
+ register struct fs *fs = fp->f_fs;
+
+ (void) block_map(fp, (daddr_t)0, &disk_block);
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) fsbtodb(fs, disk_block),
+ (int) blksize(fs, fp, 0),
+ (char **) &buf,
+ &buf_size);
+ if (rc)
+ goto exit;
+
+ bcopy((char *)buf, namebuf, (unsigned)link_len);
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+ }
+
+ /*
+ * If relative pathname, restart at parent directory.
+ * If absolute pathname, restart at root.
+ * If pathname begins '/dev/<device>/',
+ * restart at root of that device.
+ */
+ cp = namebuf;
+ if (*cp != '/') {
+ inumber = parent_inumber;
+ }
+ else if (!strprefix(cp, "/dev/")) {
+ inumber = (ino_t)ROOTINO;
+ }
+ else {
+ cp += 5;
+ component = cp;
+ while ((c = *cp) != '\0' && c != '/') {
+ cp++;
+ }
+ *cp = '\0';
+
+ /*
+ * Unmount current file system and free buffers.
+ */
+ close_file(fp);
+
+ /*
+ * Open new root device.
+ */
+ rc = device_open(master_device_port,
+ D_READ,
+ component,
+ &fp->f_dev);
+ if (rc)
+ return (rc);
+
+ if (c == 0) {
+ fp->f_fs = 0;
+ goto out_ok;
+ }
+
+ *cp = c;
+
+ rc = mount_fs(fp);
+ if (rc)
+ return (rc);
+
+ inumber = (ino_t)ROOTINO;
+ }
+ if ((rc = read_inode(inumber, fp)) != 0)
+ goto exit;
+ }
+ }
+
+ /*
+ * Found terminal component.
+ */
+ out_ok:
+ mutex_init(&fp->f_lock);
+ return 0;
+
+ /*
+ * At error exit, close file to free storage.
+ */
+ exit:
+ close_file(fp);
+ return rc;
+}
+
+/*
+ * Close file - free all storage used.
+ */
+void
+ffs_close_file(fp)
+ register struct file *fp;
+{
+ register int i;
+
+ /*
+ * Free the disk super-block.
+ */
+ unmount_fs(fp);
+
+ /*
+ * Free the inode and data buffers.
+ */
+ free_file_buffers(fp);
+}
+
+int
+ffs_file_is_directory(struct file *fp)
+{
+ return (fp->i_mode & IFMT) == IFDIR;
+}
+
+int
+ffs_file_is_regular(struct file *fp)
+{
+ return (fp->i_mode & IFMT) == IFREG;
+}
+
+/*
+ * Copy a portion of a file into kernel memory.
+ * Cross block boundaries when necessary.
+ */
+int
+ffs_read_file(fp, offset, start, size, resid)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_size_t *resid; /* out */
+{
+ int rc;
+ register vm_size_t csize;
+ vm_offset_t buf;
+ vm_size_t buf_size;
+
+ while (size != 0) {
+ rc = buf_read_file(fp, offset, &buf, &buf_size);
+ if (rc)
+ return (rc);
+
+ csize = size;
+ if (csize > buf_size)
+ csize = buf_size;
+ if (csize == 0)
+ break;
+
+ bcopy((char *)buf, (char *)start, csize);
+
+ offset += csize;
+ start += csize;
+ size -= csize;
+ }
+ if (resid)
+ *resid = size;
+
+ return (0);
+}
+
+/* simple utility: only works for 2^n */
+static int
+log2(n)
+ register unsigned int n;
+{
+ register int i = 0;
+
+ while ((n & 1) == 0) {
+ i++;
+ n >>= 1;
+ }
+ return i;
+}
+
+/*
+ * Make an empty file_direct for a device.
+ */
+int
+ffs_open_file_direct(dev, fdp, is_structured)
+ mach_port_t dev;
+ register struct file_direct *fdp;
+ boolean_t is_structured;
+{
+ struct fs *fs;
+ int rc;
+
+ if (!is_structured) {
+ fdp->fd_dev = dev;
+ fdp->fd_blocks = (daddr_t *) 0;
+ fdp->fd_bsize = vm_page_size;
+ fdp->fd_bshift = log2(vm_page_size);
+ fdp->fd_fsbtodb = 0; /* later */
+ fdp->fd_size = 0; /* later */
+ return 0;
+ }
+
+ rc = read_fs(dev, &fs);
+ if (rc)
+ return rc;
+
+ fdp->fd_dev = dev;
+ fdp->fd_blocks = (daddr_t *) 0;
+ fdp->fd_size = 0;
+ fdp->fd_bsize = fs->fs_bsize;
+ fdp->fd_bshift = fs->fs_bshift;
+ fdp->fd_fsbtodb = fs->fs_fsbtodb;
+
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fs,
+ SBSIZE);
+
+ return 0;
+}
+
+/*
+ * Add blocks from a file to a file_direct.
+ */
+int
+ffs_add_file_direct(fdp, fp)
+ register struct file_direct *fdp;
+ register struct file *fp;
+{
+ register struct fs *fs;
+ long num_blocks, i;
+ vm_offset_t buffer;
+ vm_size_t size;
+ int rc;
+
+ /* the file must be on the same device */
+
+ if (fdp->fd_dev != fp->f_dev)
+ return FS_INVALID_FS;
+
+ if (!file_is_structured(fp)) {
+ int result[DEV_GET_SIZE_COUNT];
+ natural_t count;
+
+ count = DEV_GET_SIZE_COUNT;
+ rc = device_get_status( fdp->fd_dev, DEV_GET_SIZE,
+ result, &count);
+ if (rc)
+ return rc;
+ fdp->fd_size = result[DEV_GET_SIZE_DEVICE_SIZE] >> fdp->fd_bshift;
+ fdp->fd_fsbtodb = log2(fdp->fd_bsize/result[DEV_GET_SIZE_RECORD_SIZE]);
+ return 0;
+ }
+
+ /* it must hold a file system */
+
+ fs = fp->f_fs;
+ if (fdp->fd_bsize != fs->fs_bsize ||
+ fdp->fd_fsbtodb != fs->fs_fsbtodb)
+ return FS_INVALID_FS;
+
+ /* calculate number of blocks in the file, ignoring fragments */
+
+ num_blocks = lblkno(fs, fp->i_size);
+
+ /* allocate memory for a bigger array */
+
+ size = (num_blocks + fdp->fd_size) * sizeof(daddr_t);
+ rc = vm_allocate(mach_task_self(), &buffer, size, TRUE);
+ if (rc != KERN_SUCCESS)
+ return rc;
+
+ /* lookup new block addresses */
+
+ for (i = 0; i < num_blocks; i++) {
+ daddr_t disk_block;
+
+ rc = block_map(fp, (daddr_t) i, &disk_block);
+ if (rc != 0) {
+ (void) vm_deallocate(mach_task_self(), buffer, size);
+ return rc;
+ }
+
+ ((daddr_t *) buffer)[fdp->fd_size + i] = disk_block;
+ }
+
+ /* copy old addresses and install the new array */
+
+ if (fdp->fd_blocks != 0) {
+ bcopy((char *) fdp->fd_blocks, (char *) buffer,
+ fdp->fd_size * sizeof(daddr_t));
+
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fdp->fd_blocks,
+ (vm_size_t) (fdp->fd_size * sizeof(daddr_t)));
+ }
+ fdp->fd_blocks = (daddr_t *) buffer;
+ fdp->fd_size += num_blocks;
+
+ /* deallocate cached blocks */
+
+ free_file_buffers(fp);
+
+ return 0;
+}
+
+int
+ffs_remove_file_direct(fdp)
+ struct file_direct *fdp;
+{
+ if (fdp->fd_blocks)
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fdp->fd_blocks,
+ (vm_size_t) (fdp->fd_size * sizeof(daddr_t)));
+ fdp->fd_blocks = 0; /* sanity */
+ /* xxx should lose a ref to fdp->fd_dev here (and elsewhere) xxx */
+}
+
+/*
+ * Special read and write routines for default pager.
+ * Assume that all offsets and sizes are multiples
+ * of DEV_BSIZE.
+ */
+
+#define fdir_blkoff(fdp, offset) /* offset % fd_bsize */ \
+ ((offset) & ((fdp)->fd_bsize - 1))
+#define fdir_lblkno(fdp, offset) /* offset / fd_bsize */ \
+ ((offset) >> (fdp)->fd_bshift)
+
+#define fdir_fsbtodb(fdp, block) /* offset * fd_bsize / DEV_BSIZE */ \
+ ((block) << (fdp)->fd_fsbtodb)
+
+/*
+ * Read all or part of a data block, and
+ * return a pointer to the appropriate part.
+ * Caller must deallocate the block when done.
+ */
+int
+ffs_page_read_file_direct(fdp, offset, size, addr, size_read)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_size_t size;
+ vm_offset_t *addr; /* out */
+ mach_msg_type_number_t *size_read; /* out */
+{
+ vm_offset_t off;
+ register daddr_t file_block;
+ daddr_t disk_block;
+
+ if (offset % DEV_BSIZE != 0 ||
+ size % DEV_BSIZE != 0)
+ panic("page_read_file_direct");
+
+ if (offset >= (fdp->fd_size << fdp->fd_bshift))
+ return (FS_NOT_IN_FILE);
+
+ off = fdir_blkoff(fdp, offset);
+ file_block = fdir_lblkno(fdp, offset);
+
+ if (file_is_device(fdp)) {
+ disk_block = file_block;
+ } else {
+ disk_block = fdp->fd_blocks[file_block];
+ if (disk_block == 0)
+ return (FS_NOT_IN_FILE);
+ }
+
+ if (size > fdp->fd_bsize)
+ size = fdp->fd_bsize;
+
+ return (device_read(fdp->fd_dev,
+ 0,
+ (recnum_t) (fdir_fsbtodb(fdp, disk_block) + btodb(off)),
+ (int) size,
+ (char **) addr,
+ size_read));
+}
+
+/*
+ * Write all or part of a data block, and
+ * return the amount written.
+ */
+int
+ffs_page_write_file_direct(fdp, offset, addr, size, size_written)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_offset_t *size_written; /* out */
+{
+ vm_offset_t off;
+ register daddr_t file_block;
+ daddr_t disk_block;
+ int rc, num_written;
+ vm_offset_t block_size;
+
+ if (offset % DEV_BSIZE != 0 ||
+ size % DEV_BSIZE != 0)
+ panic("page_write_file");
+
+ if (offset >= (fdp->fd_size << fdp->fd_bshift))
+ return (FS_NOT_IN_FILE);
+
+ off = fdir_blkoff(fdp, offset);
+ file_block = fdir_lblkno(fdp, offset);
+
+ if (file_is_device(fdp)) {
+ disk_block = file_block;
+ } else {
+ disk_block = fdp->fd_blocks[file_block];
+ if (disk_block == 0)
+ return (FS_NOT_IN_FILE);
+ }
+
+ if (size > fdp->fd_bsize)
+ size = fdp->fd_bsize;
+
+ /*
+ * Write the data. Wait for completion to keep
+ * reads from getting ahead of writes and reading
+ * stale data.
+ */
+ rc = device_write(
+ fdp->fd_dev,
+ 0,
+ (recnum_t) (fdir_fsbtodb(fdp, disk_block) + btodb(off)),
+ (char *) addr,
+ size,
+ &num_written);
+ *size_written = num_written;
+ return rc;
+}
+
diff --git a/serverboot/file_io.c b/serverboot/file_io.c
new file mode 100644
index 00000000..141fdcfe
--- /dev/null
+++ b/serverboot/file_io.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 1994 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Bryan Ford, University of Utah CSL
+ * MINIX FS patches: Csizmazia Balazs, University ELTE, Hungary
+ */
+/* This is just an icky kludgy "VFS layer" (harhar) for ffs and ext2 and minix. */
+
+#include "file_io.h"
+
+int
+open_file(master_device_port, path, fp)
+ mach_port_t master_device_port;
+ char * path;
+ struct file *fp;
+{
+ int rc;
+
+ if ((rc = ext2_open_file(master_device_port, path, fp))
+ != FS_INVALID_FS)
+ {
+ if (rc == 0)
+ fp->f_fstype = EXT2_FS;
+ return rc;
+ }
+ if ( (rc = minix_open_file(master_device_port, path, fp))
+ != FS_INVALID_FS )
+ {
+ if (rc == 0)
+ fp->f_fstype = MINIX_FS;
+ return rc;
+ }
+ fp->f_fstype = BSD_FFS;
+ return ffs_open_file(master_device_port, path, fp);
+}
+
+void
+close_file(fp)
+ register struct file *fp;
+{
+ switch (fp->f_fstype) {
+ case EXT2_FS:
+ ext2_close_file(fp);
+ return;
+ case MINIX_FS:
+ minix_close_file(fp);
+ return;
+ default:
+ ffs_close_file(fp);
+ return;
+ }
+}
+
+int
+read_file(fp, offset, start, size, resid)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_size_t *resid; /* out */
+{
+ switch (fp->f_fstype) {
+ case EXT2_FS:
+ return ext2_read_file(fp, offset, start, size, resid);
+ case MINIX_FS:
+ return minix_read_file(fp, offset, start, size, resid);
+ default:
+ return ffs_read_file(fp, offset, start, size, resid);
+ }
+
+}
+
+int
+file_is_directory(struct file *f)
+{
+ switch (f->f_fstype) {
+ case EXT2_FS:
+ return ext2_file_is_directory(f);
+ case MINIX_FS:
+ return minix_file_is_directory(f);
+ default:
+ return ffs_file_is_directory(f);
+ }
+}
+
+int
+file_is_regular(struct file *f)
+{
+ switch (f->f_fstype) {
+ case EXT2_FS:
+ return ext2_file_is_regular(f);
+ case MINIX_FS:
+ return minix_file_is_regular(f);
+ default:
+ return ffs_file_is_regular(f);
+ }
+
+}
+
+int
+open_file_direct(dev, fdp, is_structured)
+ mach_port_t dev;
+ register struct file_direct *fdp;
+ boolean_t is_structured;
+{
+ int rc;
+
+
+ if ((rc = ext2_open_file_direct(dev, fdp, is_structured))
+ != FS_INVALID_FS)
+ {
+ if (rc == 0)
+ fdp->f_fstype = EXT2_FS;
+ return rc;
+ }
+ if ( (rc = minix_open_file_direct(dev, fdp, is_structured) )
+ != FS_INVALID_FS )
+ {
+ if (rc == 0)
+ fdp->f_fstype = MINIX_FS;
+ return rc;
+ }
+ fdp->f_fstype = BSD_FFS;
+ return ffs_open_file_direct(dev, fdp, is_structured);
+}
+
+int
+add_file_direct(fdp, fp)
+ register struct file_direct *fdp;
+ register struct file *fp;
+{
+ switch (fp->f_fstype) {
+ case EXT2_FS:
+ return ext2_add_file_direct(fdp, fp);
+ case MINIX_FS:
+ return minix_add_file_direct(fdp, fp);
+ default:
+ return ffs_add_file_direct(fdp, fp);
+ }
+}
+
+int
+page_read_file_direct(fdp, offset, size, addr, size_read)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_size_t size;
+ vm_offset_t *addr; /* out */
+ mach_msg_type_number_t *size_read; /* out */
+{
+ switch (fdp->f_fstype) {
+ case EXT2_FS:
+ return ext2_page_read_file_direct(fdp, offset, size, addr, size_read);
+ case MINIX_FS:
+ return minix_page_read_file_direct(fdp, offset, size, addr, size_read);
+ default:
+ return ffs_page_read_file_direct(fdp, offset, size, addr, size_read);
+ }
+}
+
+int
+page_write_file_direct(fdp, offset, addr, size, size_written)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_offset_t *size_written; /* out */
+{
+ switch (fdp->f_fstype) {
+ case EXT2_FS:
+ return ext2_page_write_file_direct(fdp, offset, addr, size, size_written);
+ case MINIX_FS:
+ return minix_page_write_file_direct(fdp, offset, addr, size, size_written);
+ default:
+ return ffs_page_write_file_direct(fdp, offset, addr, size, size_written);
+ }
+}
+
+int
+remove_file_direct(fdp)
+ struct file_direct *fdp;
+{
+ switch (fdp->f_fstype) {
+ case EXT2_FS:
+ return ext2_remove_file_direct(fdp);
+ case MINIX_FS:
+ return minix_remove_file_direct(fdp);
+ default:
+ return ffs_remove_file_direct(fdp);
+ }
+}
+
+/*
+ * some other stuff, that was previously defined as macro
+ */
+
+int
+file_is_structured(fp)
+ register struct file *fp;
+{
+ switch (fp->f_fstype) {
+ case EXT2_FS:
+ return (fp)->u.ext2.ext2_fs != 0;
+ case MINIX_FS:
+ return (fp)->u.minix.minix_fs != 0;
+ default:
+ return (fp)->u.ffs.ffs_fs != 0;
+ }
+}
diff --git a/serverboot/file_io.h b/serverboot/file_io.h
new file mode 100644
index 00000000..5706ce5b
--- /dev/null
+++ b/serverboot/file_io.h
@@ -0,0 +1,174 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#ifndef _FILE_IO_H_
+#define _FILE_IO_H_
+
+/*
+ * Read-only file IO.
+ */
+
+#include <mach.h>
+#include <cthreads.h>
+
+#include <device/device_types.h>
+
+#include <defs.h>
+#include "minix_fs.h"
+#include "ext2_fs.h"
+#include "disk_inode.h"
+
+#define BSD_FFS 0
+#define EXT2_FS 1
+#define MINIX_FS 2
+
+#define EXT2_NIADDR (EXT2_N_BLOCKS - EXT2_NDIR_BLOCKS)
+
+/*
+ * In-core open file.
+ */
+struct file {
+ struct mutex f_lock; /* lock */
+ mach_port_t f_dev; /* port to device */
+ vm_offset_t f_buf; /* buffer for data block */
+ vm_size_t f_buf_size; /* size of data block */
+ daddr_t f_buf_blkno; /* block number of data block */
+ vm_size_t f_size; /* size in bytes of the file */
+
+ int f_fstype; /* contains fs-id */
+
+ union {
+ struct {
+ struct fs * ffs_fs; /* pointer to super-block */
+ struct icommon ffs_ic; /* copy of on-disk inode */
+
+ /* number of blocks mapped by
+ indirect block at level i */
+ int ffs_nindir[FFS_NIADDR+1];
+
+ /* buffer for indirect block at level i */
+ vm_offset_t ffs_blk[FFS_NIADDR];
+
+ /* size of buffer */
+ vm_size_t ffs_blksize[FFS_NIADDR];
+
+ /* disk address of block in buffer */
+ daddr_t ffs_blkno[FFS_NIADDR];
+ } ffs;
+ struct {
+ /* pointer to super-block */
+ struct ext2_super_block*ext2_fs;
+
+ /* pointer to group descriptors */
+ struct ext2_group_desc* ext2_gd;
+
+ /* size of group descriptors */
+ vm_size_t ext2_gd_size;
+
+ /* copy of on-disk inode */
+ struct ext2_inode ext2_ic;
+
+ /* number of blocks mapped by
+ indirect block at level i */
+ int ext2_nindir[EXT2_NIADDR+1];
+
+ /* buffer for indirect block at level i */
+ vm_offset_t ext2_blk[EXT2_NIADDR];
+
+ /* size of buffer */
+ vm_size_t ext2_blksize[EXT2_NIADDR];
+
+ /* disk address of block in buffer */
+ daddr_t ext2_blkno[EXT2_NIADDR];
+ } ext2;
+ struct {
+ /* pointer to super-block */
+ struct minix_super_block* minix_fs;
+
+ /* copy of on-disk inode */
+ struct minix_inode minix_ic;
+
+ /* number of blocks mapped by
+ indirect block at level i */
+ int minix_nindir[MINIX_NIADDR+1];
+
+ /* buffer for indirect block at level i */
+ vm_offset_t minix_blk[MINIX_NIADDR];
+
+ /* size of buffer */
+ vm_size_t minix_blksize[MINIX_NIADDR];
+
+ /* disk address of block in buffer */
+ minix_daddr_t minix_blkno[MINIX_NIADDR];
+ } minix;
+ } u;
+};
+
+/*
+ * In-core open file, with in-core block map.
+ */
+struct file_direct {
+ int f_fstype; /* XXX was: true if ext2, false if ffs */
+
+ mach_port_t fd_dev; /* port to device */
+ daddr_t * fd_blocks; /* array of disk block addresses */
+ long fd_size; /* number of blocks in the array */
+ long fd_bsize; /* disk block size */
+ long fd_bshift; /* log2(fd_bsize) */
+ long fd_fsbtodb; /* log2(fd_bsize / disk sector size) */
+};
+
+#define file_is_device(_fd_) ((_fd_)->fd_blocks == 0)
+
+/*
+ * Exported routines.
+ */
+
+extern int open_file();
+extern void close_file();
+extern int read_file();
+
+extern int open_file_direct();
+extern int add_file_direct();
+extern int remove_file_direct();
+extern int file_wire_direct();
+extern int page_read_file_direct();
+extern int page_write_file_direct();
+
+/*
+ * Error codes for file system errors.
+ */
+
+#define FS_NOT_DIRECTORY 5000 /* not a directory */
+#define FS_NO_ENTRY 5001 /* name not found */
+#define FS_NAME_TOO_LONG 5002 /* name too long */
+#define FS_SYMLINK_LOOP 5003 /* symbolic link loop */
+#define FS_INVALID_FS 5004 /* bad file system */
+#define FS_NOT_IN_FILE 5005 /* offset not in file */
+#define FS_INVALID_PARAMETER 5006 /* bad parameter to
+ a routine */
+
+#endif /* _FILE_IO_H_ */
diff --git a/serverboot/fs.h b/serverboot/fs.h
new file mode 100644
index 00000000..5809ed93
--- /dev/null
+++ b/serverboot/fs.h
@@ -0,0 +1,455 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms are permitted
+ * provided that the above copyright notice and this paragraph are
+ * duplicated in all such forms and that any documentation,
+ * advertising materials, and other materials related to such
+ * distribution and use acknowledge that the software was developed
+ * by the University of California, Berkeley. The name of the
+ * University may not be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * @(#)fs.h 7.7 (Berkeley) 5/9/89
+ */
+
+/*
+ * Each disk drive contains some number of file systems.
+ * A file system consists of a number of cylinder groups.
+ * Each cylinder group has inodes and data.
+ *
+ * A file system is described by its super-block, which in turn
+ * describes the cylinder groups. The super-block is critical
+ * data and is replicated in each cylinder group to protect against
+ * catastrophic loss. This is done at `newfs' time and the critical
+ * super-block data does not change, so the copies need not be
+ * referenced further unless disaster strikes.
+ *
+ * For file system fs, the offsets of the various blocks of interest
+ * are given in the super block as:
+ * [fs->fs_sblkno] Super-block
+ * [fs->fs_cblkno] Cylinder group block
+ * [fs->fs_iblkno] Inode blocks
+ * [fs->fs_dblkno] Data blocks
+ * The beginning of cylinder group cg in fs, is given by
+ * the ``cgbase(fs, cg)'' macro.
+ *
+ * The first boot and super blocks are given in absolute disk addresses.
+ * The byte-offset forms are preferred, as they don't imply a sector size.
+ */
+#define BBSIZE 8192
+#define SBSIZE 8192
+#define BBOFF ((off_t)(0))
+#define SBOFF ((off_t)(BBOFF + BBSIZE))
+#define BBLOCK ((daddr_t)(0))
+#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE))
+
+/*
+ * Addresses stored in inodes are capable of addressing fragments
+ * of `blocks'. File system blocks of at most size MAXBSIZE can
+ * be optionally broken into 2, 4, or 8 pieces, each of which is
+ * addressible; these pieces may be DEV_BSIZE, or some multiple of
+ * a DEV_BSIZE unit.
+ *
+ * Large files consist of exclusively large data blocks. To avoid
+ * undue wasted disk space, the last data block of a small file may be
+ * allocated as only as many fragments of a large block as are
+ * necessary. The file system format retains only a single pointer
+ * to such a fragment, which is a piece of a single large block that
+ * has been divided. The size of such a fragment is determinable from
+ * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
+ *
+ * The file system records space availability at the fragment level;
+ * to determine block availability, aligned fragments are examined.
+ *
+ * The root inode is the root of the file system.
+ * Inode 0 can't be used for normal purposes and
+ * historically bad blocks were linked to inode 1,
+ * thus the root inode is 2. (inode 1 is no longer used for
+ * this purpose, however numerous dump tapes make this
+ * assumption, so we are stuck with it)
+ */
+#define ROOTINO ((ino_t)2) /* i number of all roots */
+
+/*
+ * MINBSIZE is the smallest allowable block size.
+ * In order to insure that it is possible to create files of size
+ * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
+ * MINBSIZE must be big enough to hold a cylinder group block,
+ * thus changes to (struct cg) must keep its size within MINBSIZE.
+ * Note that super blocks are always of size SBSIZE,
+ * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
+ */
+#define MINBSIZE 4096
+
+/*
+ * The path name on which the file system is mounted is maintained
+ * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
+ * the super block for this name.
+ * The limit on the amount of summary information per file system
+ * is defined by MAXCSBUFS. It is currently parameterized for a
+ * maximum of two million cylinders.
+ */
+#define MAXMNTLEN 512
+#define MAXCSBUFS 32
+
+/*
+ * Per cylinder group information; summarized in blocks allocated
+ * from first cylinder group data blocks. These blocks have to be
+ * read in from fs_csaddr (size fs_cssize) in addition to the
+ * super block.
+ *
+ * N.B. sizeof(struct csum) must be a power of two in order for
+ * the ``fs_cs'' macro to work (see below).
+ */
+struct csum {
+ int cs_ndir; /* number of directories */
+ int cs_nbfree; /* number of free blocks */
+ int cs_nifree; /* number of free inodes */
+ int cs_nffree; /* number of free frags */
+};
+
+/*
+ * Super block for a file system.
+ */
+#define FS_MAGIC 0x011954
+struct fs
+{
+ int xxx1; /* struct fs *fs_link;*/
+ int xxx2; /* struct fs *fs_rlink;*/
+ daddr_t fs_sblkno; /* addr of super-block in filesys */
+ daddr_t fs_cblkno; /* offset of cyl-block in filesys */
+ daddr_t fs_iblkno; /* offset of inode-blocks in filesys */
+ daddr_t fs_dblkno; /* offset of first data after cg */
+ int fs_cgoffset; /* cylinder group offset in cylinder */
+ int fs_cgmask; /* used to calc mod fs_ntrak */
+ time_t fs_time; /* last time written */
+ int fs_size; /* number of blocks in fs */
+ int fs_dsize; /* number of data blocks in fs */
+ int fs_ncg; /* number of cylinder groups */
+ int fs_bsize; /* size of basic blocks in fs */
+ int fs_fsize; /* size of frag blocks in fs */
+ int fs_frag; /* number of frags in a block in fs */
+/* these are configuration parameters */
+ int fs_minfree; /* minimum percentage of free blocks */
+ int fs_rotdelay; /* num of ms for optimal next block */
+ int fs_rps; /* disk revolutions per second */
+/* these fields can be computed from the others */
+ int fs_bmask; /* ``blkoff'' calc of blk offsets */
+ int fs_fmask; /* ``fragoff'' calc of frag offsets */
+ int fs_bshift; /* ``lblkno'' calc of logical blkno */
+ int fs_fshift; /* ``numfrags'' calc number of frags */
+/* these are configuration parameters */
+ int fs_maxcontig; /* max number of contiguous blks */
+ int fs_maxbpg; /* max number of blks per cyl group */
+/* these fields can be computed from the others */
+ int fs_fragshift; /* block to frag shift */
+ int fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
+ int fs_sbsize; /* actual size of super block */
+ int fs_csmask; /* csum block offset */
+ int fs_csshift; /* csum block number */
+ int fs_nindir; /* value of NINDIR */
+ int fs_inopb; /* value of INOPB */
+ int fs_nspf; /* value of NSPF */
+/* yet another configuration parameter */
+ int fs_optim; /* optimization preference, see below */
+/* these fields are derived from the hardware */
+ int fs_npsect; /* # sectors/track including spares */
+ int fs_interleave; /* hardware sector interleave */
+ int fs_trackskew; /* sector 0 skew, per track */
+ int fs_headswitch; /* head switch time, usec */
+ int fs_trkseek; /* track-to-track seek, usec */
+/* sizes determined by number of cylinder groups and their sizes */
+ daddr_t fs_csaddr; /* blk addr of cyl grp summary area */
+ int fs_cssize; /* size of cyl grp summary area */
+ int fs_cgsize; /* cylinder group size */
+/* these fields are derived from the hardware */
+ int fs_ntrak; /* tracks per cylinder */
+ int fs_nsect; /* sectors per track */
+ int fs_spc; /* sectors per cylinder */
+/* this comes from the disk driver partitioning */
+ int fs_ncyl; /* cylinders in file system */
+/* these fields can be computed from the others */
+ int fs_cpg; /* cylinders per group */
+ int fs_ipg; /* inodes per group */
+ int fs_fpg; /* blocks per group * fs_frag */
+/* this data must be re-computed after crashes */
+ struct csum fs_cstotal; /* cylinder summary information */
+/* these fields are cleared at mount time */
+ char fs_fmod; /* super block modified flag */
+ char fs_clean; /* file system is clean flag */
+ char fs_ronly; /* mounted read-only flag */
+ char fs_flags; /* currently unused flag */
+ char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
+/* these fields retain the current block allocation info */
+ int fs_cgrotor; /* last cg searched */
+#if 1
+ int was_fs_csp[MAXCSBUFS];
+#else
+ struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
+#endif
+ int fs_cpc; /* cyl per cycle in postbl */
+ short fs_opostbl[16][8]; /* old rotation block list head */
+ long fs_sparecon[50]; /* reserved for future constants */
+ long fs_contigsumsize; /* size of cluster summary array */
+ long fs_maxsymlinklen; /* max length of an internal symlink */
+ long fs_inodefmt; /* format of on-disk inodes */
+ quad fs_maxfilesize; /* maximum representable file size */
+ quad fs_qbmask; /* ~fs_bmask - for use with quad size */
+ quad fs_qfmask; /* ~fs_fmask - for use with quad size */
+ long fs_state; /* validate fs_clean field */
+ int fs_postblformat; /* format of positional layout tables */
+ int fs_nrpos; /* number of rotaional positions */
+ int fs_postbloff; /* (short) rotation block list head */
+ int fs_rotbloff; /* (u_char) blocks for each rotation */
+ int fs_magic; /* magic number */
+ u_char fs_space[1]; /* list of blocks for each rotation */
+/* actually longer */
+};
+/*
+ * Preference for optimization.
+ */
+#define FS_OPTTIME 0 /* minimize allocation time */
+#define FS_OPTSPACE 1 /* minimize disk fragmentation */
+
+/*
+ * Rotational layout table format types
+ */
+#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */
+#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */
+/*
+ * Macros for access to superblock array structures
+ */
+#define fs_postbl(fs, cylno) \
+ (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+ ? ((fs)->fs_opostbl[cylno]) \
+ : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos))
+#define fs_rotbl(fs) \
+ (((fs)->fs_postblformat == FS_42POSTBLFMT) \
+ ? ((fs)->fs_space) \
+ : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff)))
+
+/*
+ * Convert cylinder group to base address of its global summary info.
+ *
+ * N.B. This macro assumes that sizeof(struct csum) is a power of two.
+ */
+#define fs_cs(fs, indx) \
+ fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask]
+
+/*
+ * Cylinder group block for a file system.
+ */
+#define CG_MAGIC 0x090255
+struct cg {
+ int xxx1; /* struct cg *cg_link;*/
+ int cg_magic; /* magic number */
+ time_t cg_time; /* time last written */
+ int cg_cgx; /* we are the cgx'th cylinder group */
+ short cg_ncyl; /* number of cyl's this cg */
+ short cg_niblk; /* number of inode blocks this cg */
+ int cg_ndblk; /* number of data blocks this cg */
+ struct csum cg_cs; /* cylinder summary information */
+ int cg_rotor; /* position of last used block */
+ int cg_frotor; /* position of last used frag */
+ int cg_irotor; /* position of last used inode */
+ int cg_frsum[MAXFRAG]; /* counts of available frags */
+ int cg_btotoff; /* (long) block totals per cylinder */
+ int cg_boff; /* (short) free block positions */
+ int cg_iusedoff; /* (char) used inode map */
+ int cg_freeoff; /* (u_char) free block map */
+ int cg_nextfreeoff; /* (u_char) next available space */
+ int cg_sparecon[16]; /* reserved for future use */
+ u_char cg_space[1]; /* space for cylinder group maps */
+/* actually longer */
+};
+/*
+ * Macros for access to cylinder group array structures
+ */
+#define cg_blktot(cgp) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_btot) \
+ : ((int *)((char *)(cgp) + (cgp)->cg_btotoff)))
+#define cg_blks(fs, cgp, cylno) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_b[cylno]) \
+ : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos))
+#define cg_inosused(cgp) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_iused) \
+ : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff)))
+#define cg_blksfree(cgp) \
+ (((cgp)->cg_magic != CG_MAGIC) \
+ ? (((struct ocg *)(cgp))->cg_free) \
+ : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff)))
+#define cg_chkmagic(cgp) \
+ ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
+
+/*
+ * The following structure is defined
+ * for compatibility with old file systems.
+ */
+struct ocg {
+ int xxx1; /* struct ocg *cg_link;*/
+ int xxx2; /* struct ocg *cg_rlink;*/
+ time_t cg_time; /* time last written */
+ int cg_cgx; /* we are the cgx'th cylinder group */
+ short cg_ncyl; /* number of cyl's this cg */
+ short cg_niblk; /* number of inode blocks this cg */
+ int cg_ndblk; /* number of data blocks this cg */
+ struct csum cg_cs; /* cylinder summary information */
+ int cg_rotor; /* position of last used block */
+ int cg_frotor; /* position of last used frag */
+ int cg_irotor; /* position of last used inode */
+ int cg_frsum[8]; /* counts of available frags */
+ int cg_btot[32]; /* block totals per cylinder */
+ short cg_b[32][8]; /* positions of free blocks */
+ char cg_iused[256]; /* used inode map */
+ int cg_magic; /* magic number */
+ u_char cg_free[1]; /* free block map */
+/* actually longer */
+};
+
+/*
+ * Turn file system block numbers into disk block addresses.
+ * This maps file system blocks to device size blocks.
+ */
+#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb)
+#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb)
+
+/*
+ * Cylinder group macros to locate things in cylinder groups.
+ * They calc file system addresses of cylinder group data structures.
+ */
+#define cgbase(fs, c) ((daddr_t)((fs)->fs_fpg * (c)))
+#define cgstart(fs, c) \
+ (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
+#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */
+#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */
+#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */
+#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */
+
+/*
+ * Macros for handling inode numbers:
+ * inode number to file system block offset.
+ * inode number to cylinder group number.
+ * inode number to file system block address.
+ */
+#define itoo(fs, x) ((x) % INOPB(fs))
+#define itog(fs, x) ((x) / (fs)->fs_ipg)
+#define itod(fs, x) \
+ ((daddr_t)(cgimin(fs, itog(fs, x)) + \
+ (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs))))))
+
+/*
+ * Give cylinder group number for a file system block.
+ * Give cylinder group block number for a file system block.
+ */
+#define dtog(fs, d) ((d) / (fs)->fs_fpg)
+#define dtogd(fs, d) ((d) % (fs)->fs_fpg)
+
+/*
+ * Extract the bits for a block from a map.
+ * Compute the cylinder and rotational position of a cyl block addr.
+ */
+#define blkmap(fs, map, loc) \
+ (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
+#define cbtocylno(fs, bno) \
+ ((bno) * NSPF(fs) / (fs)->fs_spc)
+#define cbtorpos(fs, bno) \
+ (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \
+ (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \
+ (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect)
+
+/*
+ * The following macros optimize certain frequently calculated
+ * quantities by using shifts and masks in place of divisions
+ * modulos and multiplications.
+ */
+#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \
+ ((loc) & ~(fs)->fs_bmask)
+#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \
+ ((loc) & ~(fs)->fs_fmask)
+#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \
+ ((loc) >> (fs)->fs_bshift)
+#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \
+ ((loc) >> (fs)->fs_fshift)
+#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \
+ (((size) + (fs)->fs_bsize - 1) & (fs)->fs_bmask)
+#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \
+ (((size) + (fs)->fs_fsize - 1) & (fs)->fs_fmask)
+#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \
+ ((frags) >> (fs)->fs_fragshift)
+#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \
+ ((blks) << (fs)->fs_fragshift)
+#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \
+ ((fsb) & ((fs)->fs_frag - 1))
+#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \
+ ((fsb) &~ ((fs)->fs_frag - 1))
+
+/*
+ * Determine the number of available frags given a
+ * percentage to hold in reserve
+ */
+#define freespace(fs, percentreserved) \
+ (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
+ (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100))
+
+/*
+ * Determining the size of a file block in the file system.
+ */
+#define blksize(fs, ip, lbn) \
+ (((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+ ? (fs)->fs_bsize \
+ : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
+#define dblksize(fs, dip, lbn) \
+ (((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \
+ ? (fs)->fs_bsize \
+ : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
+
+/*
+ * Number of disk sectors per block; assumes DEV_BSIZE byte sector size.
+ */
+#define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift)
+#define NSPF(fs) ((fs)->fs_nspf)
+
+/*
+ * INOPB is the number of inodes in a secondary storage block.
+ */
+#define INOPB(fs) ((fs)->fs_inopb)
+#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift)
+
+/*
+ * NINDIR is the number of indirects in a file system block.
+ */
+#define NINDIR(fs) ((fs)->fs_nindir)
+
diff --git a/serverboot/gets.c b/serverboot/gets.c
new file mode 100644
index 00000000..61d14460
--- /dev/null
+++ b/serverboot/gets.c
@@ -0,0 +1,90 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1989 Carnegie Mellon University.
+ * Copyright (c) 1994 The University of Utah and
+ * the Computer Systems Laboratory (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF
+ * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY
+ * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF
+ * THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <mach.h>
+#include <device/device.h>
+#include <varargs.h>
+
+extern mach_port_t __libmach_console_port;
+
+safe_gets(str, maxlen)
+ char *str;
+ int maxlen;
+{
+ register char *lp;
+ register int c;
+
+ char inbuf[IO_INBAND_MAX];
+ mach_msg_type_number_t count;
+ register char *ip;
+ char *strmax = str + maxlen - 1; /* allow space for trailing 0 */
+
+ lp = str;
+ for (;;) {
+ count = IO_INBAND_MAX;
+ (void) device_read_inband(__libmach_console_port,
+ (dev_mode_t)0, (recnum_t)0,
+ sizeof(inbuf), inbuf, &count);
+ for (ip = inbuf; ip < &inbuf[count]; ip++) {
+ c = *ip;
+ switch (c) {
+ case '\n':
+ case '\r':
+ printf("\n");
+ *lp++ = 0;
+ return;
+
+ case '\b':
+ case '#':
+ case '\177':
+ if (lp > str) {
+ printf("\b \b");
+ lp--;
+ }
+ continue;
+ case '@':
+ case 'u'&037:
+ lp = str;
+ printf("\n\r");
+ continue;
+ default:
+ if (c >= ' ' && c < '\177') {
+ if (lp < strmax) {
+ *lp++ = c;
+ printf("%c", c);
+ }
+ else {
+ printf("%c", '\007'); /* beep */
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/serverboot/kalloc.c b/serverboot/kalloc.c
new file mode 100644
index 00000000..80438738
--- /dev/null
+++ b/serverboot/kalloc.c
@@ -0,0 +1,274 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1993-1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * File: kern/kalloc.c
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1985
+ *
+ * General kernel memory allocator. This allocator is designed
+ * to be used by the kernel to manage dynamic memory fast.
+ */
+
+#include <mach.h>
+#include <cthreads.h> /* for spin locks */
+
+#define DEBUG
+
+/*
+ * All allocations of size less than kalloc_max are rounded to the
+ * next highest power of 2.
+ */
+vm_size_t kalloc_max; /* max before we use vm_allocate */
+#define MINSIZE 4 /* minimum allocation size */
+
+struct free_list {
+ spin_lock_t lock;
+ vm_offset_t head; /* head of free list */
+#ifdef DEBUG
+ int count;
+#endif /*DEBUG*/
+};
+
+#define KLIST_MAX 13
+ /* sizes: 4, 8, 16, 32, 64,
+ 128, 256, 512, 1024,
+ 2048, 4096, 8192, 16384 */
+struct free_list kfree_list[KLIST_MAX];
+
+spin_lock_t kget_space_lock;
+vm_offset_t kalloc_next_space = 0;
+vm_offset_t kalloc_end_of_space = 0;
+
+vm_size_t kalloc_wasted_space = 0;
+
+boolean_t kalloc_initialized = FALSE;
+
+/*
+ * Initialize the memory allocator. This should be called only
+ * once on a system wide basis (i.e. first processor to get here
+ * does the initialization).
+ *
+ * This initializes all of the zones.
+ */
+
+void kalloc_init(void)
+{
+ vm_offset_t min, max;
+ vm_size_t size;
+ register int i;
+
+ /*
+ * Support free lists for items up to vm_page_size or
+ * 16Kbytes, whichever is less.
+ */
+
+ if (vm_page_size > 16*1024)
+ kalloc_max = 16*1024;
+ else
+ kalloc_max = vm_page_size;
+
+ for (i = 0; i < KLIST_MAX; i++) {
+ spin_lock_init(&kfree_list[i].lock);
+ kfree_list[i].head = 0;
+ }
+ spin_lock_init(&kget_space_lock);
+
+ /*
+ * Do not allocate memory at address 0.
+ */
+ kalloc_next_space = vm_page_size;
+ kalloc_end_of_space = vm_page_size;
+}
+
+/*
+ * Contiguous space allocator for items of less than a page size.
+ */
+vm_offset_t kget_space(vm_offset_t size)
+{
+ vm_size_t space_to_add;
+ vm_offset_t new_space = 0;
+ vm_offset_t addr;
+
+ spin_lock(&kget_space_lock);
+ while (kalloc_next_space + size > kalloc_end_of_space) {
+ /*
+ * Add at least one page to allocation area.
+ */
+ space_to_add = round_page(size);
+
+ if (new_space == 0) {
+ /*
+ * Unlock and allocate memory.
+ * Try to make it contiguous with the last
+ * allocation area.
+ */
+ spin_unlock(&kget_space_lock);
+
+ new_space = kalloc_end_of_space;
+ if (vm_map(mach_task_self(),
+ &new_space, space_to_add, (vm_offset_t) 0, TRUE,
+ MEMORY_OBJECT_NULL, (vm_offset_t) 0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT)
+ != KERN_SUCCESS)
+ return 0;
+ wire_memory(new_space, space_to_add,
+ VM_PROT_READ|VM_PROT_WRITE);
+ spin_lock(&kget_space_lock);
+ continue;
+ }
+
+ /*
+ * Memory was allocated in a previous iteration.
+ * Check whether the new region is contiguous with the
+ * old one.
+ */
+ if (new_space != kalloc_end_of_space) {
+ /*
+ * Throw away the remainder of the old space,
+ * and start a new one.
+ */
+ kalloc_wasted_space +=
+ kalloc_end_of_space - kalloc_next_space;
+ kalloc_next_space = new_space;
+ }
+ kalloc_end_of_space = new_space + space_to_add;
+
+ new_space = 0;
+ }
+
+ addr = kalloc_next_space;
+ kalloc_next_space += size;
+ spin_unlock(&kget_space_lock);
+
+ if (new_space != 0)
+ (void) vm_deallocate(mach_task_self(), new_space, space_to_add);
+
+ return addr;
+}
+
+void *kalloc(vm_size_t size)
+{
+ register vm_size_t allocsize;
+ vm_offset_t addr;
+ register struct free_list *fl;
+
+ if (!kalloc_initialized) {
+ kalloc_init();
+ kalloc_initialized = TRUE;
+ }
+
+ /* compute the size of the block that we will actually allocate */
+
+ allocsize = size;
+ if (size < kalloc_max) {
+ allocsize = MINSIZE;
+ fl = kfree_list;
+ while (allocsize < size) {
+ allocsize <<= 1;
+ fl++;
+ }
+ }
+
+ /*
+ * If our size is still small enough, check the queue for that size
+ * and allocate.
+ */
+
+ if (allocsize < kalloc_max) {
+ spin_lock(&fl->lock);
+ if ((addr = fl->head) != 0) {
+ fl->head = *(vm_offset_t *)addr;
+#ifdef DEBUG
+ fl->count--;
+#endif
+ spin_unlock(&fl->lock);
+ }
+ else {
+ spin_unlock(&fl->lock);
+ addr = kget_space(allocsize);
+ }
+ }
+ else {
+ if (vm_allocate(mach_task_self(), &addr, allocsize, TRUE)
+ != KERN_SUCCESS)
+ addr = 0;
+ }
+ return (void *) addr;
+}
+
+void
+kfree( void *data,
+ vm_size_t size)
+{
+ register vm_size_t freesize;
+ register struct free_list *fl;
+
+ freesize = size;
+ if (size < kalloc_max) {
+ freesize = MINSIZE;
+ fl = kfree_list;
+ while (freesize < size) {
+ freesize <<= 1;
+ fl++;
+ }
+ }
+
+ if (freesize < kalloc_max) {
+ spin_lock(&fl->lock);
+ *(vm_offset_t *)data = fl->head;
+ fl->head = (vm_offset_t) data;
+#ifdef DEBUG
+ fl->count++;
+#endif
+ spin_unlock(&fl->lock);
+ }
+ else {
+ (void) vm_deallocate(mach_task_self(), (vm_offset_t)data, freesize);
+ }
+}
+
+void *malloc(vm_size_t size)
+{
+ return (void *)kalloc(size);
+}
+
+void free(void *addr)
+{
+ /* Just ignore harmless attempts at cleanliness. */
+ /* panic("free not implemented"); */
+}
+
+void malloc_fork_prepare()
+{
+}
+
+void malloc_fork_parent()
+{
+}
+
+void malloc_fork_child()
+{
+}
diff --git a/serverboot/load.c b/serverboot/load.c
new file mode 100644
index 00000000..9a3e3b98
--- /dev/null
+++ b/serverboot/load.c
@@ -0,0 +1,406 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <assert.h>
+#include <mach/mach_interface.h>
+#include <varargs.h>
+#include <mach/exec/exec.h>
+#include "../boot/boot_script.h"
+
+#include <file_io.h>
+
+
+boolean_t load_protect_text = TRUE;
+
+
+struct stuff
+{
+ struct file *fp;
+ task_t user_task;
+
+ vm_offset_t aout_symtab_ofs;
+ vm_size_t aout_symtab_size;
+ vm_offset_t aout_strtab_ofs;
+ vm_size_t aout_strtab_size;
+};
+
+char *set_regs(
+ mach_port_t user_task,
+ mach_port_t user_thread,
+ struct exec_info *info,
+ int arg_size);
+
+static void read_symtab_from_file(
+ struct file *fp,
+ mach_port_t host_port,
+ task_t task,
+ char * symtab_name,
+ struct stuff *st);
+
+/* Callback functions for reading the executable file. */
+static int prog_read(void *handle, vm_offset_t file_ofs, void *buf, vm_size_t size,
+ vm_size_t *out_actual)
+{
+ struct stuff *st = handle;
+ vm_size_t resid;
+ int result;
+
+ result = read_file(st->fp, file_ofs, buf, size, &resid);
+ if (result)
+ return result;
+ *out_actual = size - resid;
+ return 0;
+}
+
+static int prog_read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size,
+ vm_offset_t mem_addr, vm_size_t mem_size,
+ exec_sectype_t sec_type)
+{
+ struct stuff *st = handle;
+ vm_offset_t page_start = trunc_page(mem_addr);
+ vm_offset_t page_end = round_page(mem_addr + mem_size);
+ vm_prot_t mem_prot = sec_type & EXEC_SECTYPE_PROT_MASK;
+ vm_offset_t area_start;
+ int result;
+
+ if (sec_type & EXEC_SECTYPE_AOUT_SYMTAB)
+ {
+ st->aout_symtab_ofs = file_ofs;
+ st->aout_symtab_size = file_size;
+ }
+ if (sec_type & EXEC_SECTYPE_AOUT_STRTAB)
+ {
+ st->aout_strtab_ofs = file_ofs;
+ st->aout_strtab_size = file_size;
+ }
+
+ if (!(sec_type & EXEC_SECTYPE_ALLOC))
+ return 0;
+
+ assert(mem_size > 0);
+ assert(mem_size > file_size);
+
+ /*
+ printf("section %08x-%08x-%08x prot %08x (%08x-%08x)\n",
+ mem_addr, mem_addr+file_size, mem_addr+mem_size, mem_prot, page_start, page_end);
+ */
+
+ result = vm_allocate(mach_task_self(), &area_start, page_end - page_start, TRUE);
+ if (result) return (result);
+
+ if (file_size > 0)
+ {
+ vm_size_t resid;
+
+ result = read_file(st->fp, file_ofs, area_start + (mem_addr - page_start),
+ file_size, &resid);
+ if (result) return result;
+ if (resid) return EX_CORRUPT;
+ }
+
+ if (mem_size > file_size)
+ {
+ bzero((void*)area_start + (mem_addr + file_size - page_start),
+ mem_size - file_size);
+ }
+
+ result = vm_allocate(st->user_task, &page_start, page_end - page_start, FALSE);
+ if (result) return (result);
+ assert(page_start == trunc_page(mem_addr));
+
+ result = vm_write(st->user_task, page_start, area_start, page_end - page_start);
+ if (result) return (result);
+
+ result = vm_deallocate(mach_task_self(), area_start, page_end - page_start);
+ if (result) return (result);
+
+ /*
+ * Protect the segment.
+ */
+ if (load_protect_text && (mem_prot != VM_PROT_ALL)) {
+ result = vm_protect(st->user_task, page_start, page_end - page_start,
+ FALSE, mem_prot);
+ if (result) return (result);
+ }
+
+ return 0;
+}
+
+mach_port_t boot_script_read_file (const char *file)
+{ return MACH_PORT_NULL; } /* XXX */
+
+int
+boot_script_exec_cmd (task_t user_task,
+ char *file_name,
+ int arg_count, char **argv,
+ char *argstrings, int argslen)
+{
+ extern mach_port_t bootstrap_master_device_port, bootstrap_master_host_port;
+ extern char *root_name;
+ int arg_len = argslen;
+ char *arg_pos;
+
+ kern_return_t result;
+ thread_t user_thread;
+ struct file file;
+ char namebuf[MAXPATHLEN+1];
+
+ struct stuff st;
+ struct exec_info info;
+
+ extern char * strbuild();
+
+ if (strcmp (file_name, "/dev/"))
+ (void) strbuild(namebuf, "/dev/", root_name, "/", file_name,
+ (char *)0);
+ else
+ strcpy (namebuf, file_name);
+
+ /*
+ * Open the file
+ */
+ bzero((char *)&file, sizeof(file));
+
+ result = open_file(bootstrap_master_device_port, namebuf, &file);
+ if (result != 0) {
+ panic("openi %d", result);
+ }
+
+ /*
+ * Add space for:
+ * arg_count
+ * pointers to arguments
+ * trailing 0 pointer
+ * dummy 0 pointer to environment variables
+ * and align to integer boundary
+ */
+ arg_len += sizeof(integer_t) + (2 + arg_count) * sizeof(char *);
+ arg_len = (arg_len + (sizeof(integer_t) - 1)) & ~(sizeof(integer_t)-1);
+
+ /*
+ * We refrain from checking IEXEC bits to make
+ * things a little easier when things went bad.
+ * Say you have ftp(1) but chmod(1) is gone.
+ */
+ if (!file_is_regular(&file))
+ panic("boot_load_program: %s is not a regular file", namebuf);
+
+ /*
+ * Load the executable file.
+ */
+ st.fp = &file;
+ st.user_task = user_task;
+ st.aout_symtab_size = 0;
+ st.aout_strtab_size = 0;
+ result = exec_load(prog_read, prog_read_exec, &st, &info);
+ if (result)
+ panic("(bootstrap) exec_load %s: error %d", namebuf, result);
+#if 0
+ printf("(bootstrap): loaded %s; entrypoint %08x\n", namebuf, info.entry);
+#endif
+
+ /*
+ * Set up the stack and user registers.
+ */
+ result = thread_create (user_task, &user_thread);
+ if (result)
+ panic ("can't create user thread for %s: %x", namebuf, result);
+ arg_pos = set_regs(user_task, user_thread, &info, arg_len);
+
+ /*
+ * Read symbols from the executable file.
+ */
+#if 0
+ printf("(bootstrap): loading symbols from %s\n", namebuf);
+ read_symtab_from_file(&file, bootstrap_master_host_port, user_task, namebuf, &st);
+#endif
+
+ /*
+ * Copy out the arguments.
+ */
+ {
+ vm_offset_t u_arg_start;
+ /* user start of argument list block */
+ vm_offset_t k_arg_start;
+ /* kernel start of argument list block */
+ vm_offset_t u_arg_page_start;
+ /* user start of args, page-aligned */
+ vm_size_t arg_page_size;
+ /* page_aligned size of args */
+ vm_offset_t k_arg_page_start;
+ /* kernel start of args, page-aligned */
+
+ register
+ char ** k_ap; /* kernel arglist address */
+ char * u_cp; /* user argument string address */
+ register
+ char * k_cp; /* kernel argument string address */
+ register
+ int i;
+
+ /*
+ * Get address of argument list in user space
+ */
+ u_arg_start = (vm_offset_t)arg_pos;
+
+ /*
+ * Round to page boundaries, and allocate kernel copy
+ */
+ u_arg_page_start = trunc_page(u_arg_start);
+ arg_page_size = (vm_size_t)(round_page(u_arg_start + arg_len)
+ - u_arg_page_start);
+
+ result = vm_allocate(mach_task_self(),
+ &k_arg_page_start,
+ (vm_size_t)arg_page_size,
+ TRUE);
+ if (result)
+ panic("boot_load_program: arg size");
+
+ /*
+ * Set up addresses corresponding to user pointers
+ * in the kernel block
+ */
+ k_arg_start = k_arg_page_start + (u_arg_start - u_arg_page_start);
+
+ k_ap = (char **)k_arg_start;
+
+ /*
+ * Start the strings after the arg-count and pointers
+ */
+ u_cp = (char *)u_arg_start + arg_count * sizeof(char *)
+ + 2 * sizeof(char *)
+ + sizeof(integer_t);
+ k_cp = (char *)k_arg_start + arg_count * sizeof(char *)
+ + 2 * sizeof(char *)
+ + sizeof(integer_t);
+
+ /*
+ * first the argument count
+ */
+ *k_ap++ = (char *)arg_count;
+
+ /*
+ * Then the strings and string pointers for each argument
+ */
+ for (i = 0; i < arg_count; i++)
+ *k_ap++ = argv[i] - argstrings + u_cp;
+ bcopy (argstrings, k_cp, argslen);
+
+ /*
+ * last, the trailing 0 argument and a null environment pointer.
+ */
+ *k_ap++ = (char *)0;
+ *k_ap = (char *)0;
+
+ /*
+ * Now write all of this to user space.
+ */
+ (void) vm_write(user_task,
+ u_arg_page_start,
+ k_arg_page_start,
+ arg_page_size);
+
+ (void) vm_deallocate(mach_task_self(),
+ k_arg_page_start,
+ arg_page_size);
+ }
+
+ /*
+ * Close the file.
+ */
+ close_file(&file);
+
+ /* Resume the thread. */
+ thread_resume (user_thread);
+ mach_port_deallocate (mach_task_self (), user_thread);
+
+ return (0);
+}
+
+/*
+ * Load symbols from file into kernel debugger.
+ */
+static void read_symtab_from_file(
+ struct file *fp,
+ mach_port_t host_port,
+ task_t task,
+ char * symtab_name,
+ struct stuff *st)
+{
+ vm_size_t resid;
+ kern_return_t result;
+ vm_size_t table_size;
+ vm_offset_t symtab;
+
+#if 0
+
+ if (!st->aout_symtab_size || !st->aout_strtab_size)
+ return;
+
+ /*
+ * Allocate space for the symbol table.
+ */
+ table_size = sizeof(vm_size_t)
+ + st->aout_symtab_size
+ + st->aout_strtab_size;
+ result= vm_allocate(mach_task_self(),
+ &symtab,
+ table_size,
+ TRUE);
+ if (result) {
+ printf("[ error %d allocating space for %s symbol table ]\n",
+ result, symtab_name);
+ return;
+ }
+
+ /*
+ * Set the symbol table length word,
+ * then read in the symbol table and string table.
+ */
+ *(vm_size_t*)symtab = st->aout_symtab_size;
+ result = read_file(fp, st->aout_symtab_ofs,
+ symtab + sizeof(vm_size_t),
+ st->aout_symtab_size + st->aout_strtab_size,
+ &resid);
+ if (result || resid) {
+ printf("[ no valid symbol table present for %s ]\n",
+ symtab_name);
+ }
+ else {
+ /*
+ * Load the symbols into the kernel.
+ */
+ result = host_load_symbol_table(
+ host_port,
+ task,
+ symtab_name,
+ symtab,
+ table_size);
+ }
+ (void) vm_deallocate(mach_task_self(), symtab, table_size);
+#endif
+}
diff --git a/serverboot/minix_ffs_compat.c b/serverboot/minix_ffs_compat.c
new file mode 100644
index 00000000..7d493520
--- /dev/null
+++ b/serverboot/minix_ffs_compat.c
@@ -0,0 +1,62 @@
+/*
+ * BSD FFS like functions used to ease porting bootstrap to MINIX fs
+ * Copyright (C) 1994 Csizmazia Balazs, University ELTE, Hungary
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <mach/mach_traps.h>
+#include <mach/mach_interface.h>
+
+#include <file_io.h>
+
+#define MINIX_BLOCK_SIZE 1024
+
+int minix_ino2blk (struct minix_super_block *fs, int ino)
+{
+ int blk;
+
+ blk=0 /* it's Mach */+2 /* boot+superblock */ + fs->s_imap_blocks +
+ fs->s_zmap_blocks + (ino-1)/MINIX_INODES_PER_BLOCK;
+ return blk;
+}
+
+int minix_fsbtodb (struct minix_super_block *fs, int b)
+{
+ return (b * MINIX_BLOCK_SIZE) / DEV_BSIZE;
+}
+
+int minix_itoo (struct minix_super_block *fs, int ino)
+{
+ return (ino - 1) % MINIX_INODES_PER_BLOCK;
+}
+
+int minix_blkoff (struct minix_super_block * fs, vm_offset_t offset)
+{
+ return offset % MINIX_BLOCK_SIZE;
+}
+
+int minix_lblkno (struct minix_super_block * fs, vm_offset_t offset)
+{
+ return offset / MINIX_BLOCK_SIZE;
+}
+
+int minix_blksize (struct minix_super_block *fs, struct file *fp, minix_daddr_t file_block)
+{
+ return MINIX_BLOCK_SIZE;
+}
diff --git a/serverboot/minix_ffs_compat.h b/serverboot/minix_ffs_compat.h
new file mode 100644
index 00000000..cc038032
--- /dev/null
+++ b/serverboot/minix_ffs_compat.h
@@ -0,0 +1,43 @@
+/*
+ * BSD FFS like declarations used to ease porting bootstrap to MINIX fs
+ * Copyright (C) 1994 Csizmazia Balazs, University ELTE, Hungary
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define MINIX_SBSIZE MINIX_BLOCK_SIZE /* Size of superblock */
+#define MINIX_SBLOCK ((minix_daddr_t) 2) /* Location of superblock */
+
+#define MINIX_NDADDR 7
+#define MINIX_NIADDR 2
+
+#define MINIX_MAXNAMLEN 14
+
+#define MINIX_ROOTINO 1 /* MINIX ROOT INODE */
+
+#define MINIX_NINDIR(fs) 512 /* DISK_ADDRESSES_PER_BLOCKS */
+
+#define IFMT 00170000
+#define IFREG 0100000
+#define IFDIR 0040000
+#define ISVTX 0001000
+
+#define f_fs u.minix.minix_fs
+#define i_ic u.minix.minix_ic
+#define f_nindir u.minix.minix_nindir
+#define f_blk u.minix.minix_blk
+#define f_blksize u.minix.minix_blksize
+#define f_blkno u.minix.minix_blkno
+
diff --git a/serverboot/minix_file_io.c b/serverboot/minix_file_io.c
new file mode 100644
index 00000000..0a18092b
--- /dev/null
+++ b/serverboot/minix_file_io.c
@@ -0,0 +1,966 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Stand-alone file reading package.
+ */
+
+#include <device/device_types.h>
+#include <device/device.h>
+
+#include <mach/mach_traps.h>
+#include <mach/mach_interface.h>
+
+#include "file_io.h"
+#include "minix_ffs_compat.h"
+#include "minix_fs.h"
+
+void minix_close_file(); /* forward */
+
+#define MINIX_NAME_LEN 14
+#define MINIX_BLOCK_SIZE 1024
+
+/*
+ * Free file buffers, but don't close file.
+ */
+static void
+free_file_buffers(fp)
+ register struct file *fp;
+{
+ register int level;
+
+ /*
+ * Free the indirect blocks
+ */
+ for (level = 0; level < MINIX_NIADDR; level++) {
+ if (fp->f_blk[level] != 0) {
+ (void) vm_deallocate(mach_task_self(),
+ fp->f_blk[level],
+ fp->f_blksize[level]);
+ fp->f_blk[level] = 0;
+ }
+ fp->f_blkno[level] = -1;
+ }
+
+ /*
+ * Free the data block
+ */
+ if (fp->f_buf != 0) {
+ (void) vm_deallocate(mach_task_self(),
+ fp->f_buf,
+ fp->f_buf_size);
+ fp->f_buf = 0;
+ }
+ fp->f_buf_blkno = -1;
+}
+
+/*
+ * Read a new inode into a file structure.
+ */
+static int
+read_inode(inumber, fp)
+ ino_t inumber;
+ register struct file *fp;
+{
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ register
+ struct minix_super_block *fs;
+ minix_daddr_t disk_block;
+ kern_return_t rc;
+
+ fs = fp->f_fs;
+ disk_block = minix_ino2blk(fs, inumber);
+
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) minix_fsbtodb(fp->f_fs, disk_block),
+ (int) MINIX_BLOCK_SIZE,
+ (char **)&buf,
+ &buf_size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ {
+ register struct minix_inode *dp;
+
+ dp = (struct minix_inode *)buf;
+ dp += minix_itoo(fs, inumber);
+ fp->i_ic = *dp;
+ fp->f_size = dp->i_size;
+ }
+
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+
+ /*
+ * Clear out the old buffers
+ */
+ free_file_buffers(fp);
+
+ return (0);
+}
+
+/*
+ * Given an offset in a file, find the disk block number that
+ * contains that block.
+ */
+static int
+block_map(fp, file_block, disk_block_p)
+ struct file *fp;
+ minix_daddr_t file_block;
+ minix_daddr_t *disk_block_p; /* out */
+{
+ int level;
+ int idx;
+ minix_daddr_t ind_block_num;
+ kern_return_t rc;
+
+ vm_offset_t olddata[MINIX_NIADDR+1];
+ vm_size_t oldsize[MINIX_NIADDR+1];
+
+ /*
+ * Index structure of an inode:
+ *
+ * i_db[0..NDADDR-1] hold block numbers for blocks
+ * 0..NDADDR-1
+ *
+ * i_ib[0] index block 0 is the single indirect
+ * block
+ * holds block numbers for blocks
+ * NDADDR .. NDADDR + NINDIR(fs)-1
+ *
+ * i_ib[1] index block 1 is the double indirect
+ * block
+ * holds block numbers for INDEX blocks
+ * for blocks
+ * NDADDR + NINDIR(fs) ..
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1
+ *
+ * i_ib[2] index block 2 is the triple indirect
+ * block
+ * holds block numbers for double-indirect
+ * blocks for blocks
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2 ..
+ * NDADDR + NINDIR(fs) + NINDIR(fs)**2
+ * + NINDIR(fs)**3 - 1
+ */
+
+ mutex_lock(&fp->f_lock);
+
+ if (file_block < MINIX_NDADDR) {
+ /* Direct block. */
+ *disk_block_p = fp->i_ic.i_zone[file_block];
+ mutex_unlock(&fp->f_lock);
+ return (0);
+ }
+
+ file_block -= MINIX_NDADDR;
+
+ /*
+ * nindir[0] = NINDIR
+ * nindir[1] = NINDIR**2
+ * nindir[2] = NINDIR**3
+ * etc
+ */
+ for (level = 0; level < MINIX_NIADDR; level++) {
+ if (file_block < fp->f_nindir[level])
+ break;
+ file_block -= fp->f_nindir[level];
+ }
+ if (level == MINIX_NIADDR) {
+ /* Block number too high */
+ mutex_unlock(&fp->f_lock);
+ return (FS_NOT_IN_FILE);
+ }
+
+ ind_block_num = fp->i_ic.i_zone[level + MINIX_NDADDR];
+
+ /*
+ * Initialize array of blocks to free.
+ */
+ for (idx = 0; idx < MINIX_NIADDR; idx++)
+ oldsize[idx] = 0;
+
+ for (; level >= 0; level--) {
+
+ vm_offset_t data;
+ mach_msg_type_number_t size;
+
+ if (ind_block_num == 0)
+ break;
+
+ if (fp->f_blkno[level] == ind_block_num) {
+ /*
+ * Cache hit. Just pick up the data.
+ */
+
+ data = fp->f_blk[level];
+ }
+ else {
+ /*
+ * Drop our lock while doing the read.
+ * (The f_dev and f_fs fields don`t change.)
+ */
+ mutex_unlock(&fp->f_lock);
+
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) minix_fsbtodb(fp->f_fs, ind_block_num),
+ MINIX_BLOCK_SIZE,
+ (char **)&data,
+ &size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ /*
+ * See if we can cache the data. Need a write lock to
+ * do this. While we hold the write lock, we can`t do
+ * *anything* which might block for memory. Otherwise
+ * a non-privileged thread might deadlock with the
+ * privileged threads. We can`t block while taking the
+ * write lock. Otherwise a non-privileged thread
+ * blocked in the vm_deallocate (while holding a read
+ * lock) will block a privileged thread. For the same
+ * reason, we can`t take a read lock and then use
+ * lock_read_to_write.
+ */
+
+ mutex_lock(&fp->f_lock);
+
+ olddata[level] = fp->f_blk[level];
+ oldsize[level] = fp->f_blksize[level];
+
+ fp->f_blkno[level] = ind_block_num;
+ fp->f_blk[level] = data;
+ fp->f_blksize[level] = size;
+
+ /*
+ * Return to holding a read lock, and
+ * dispose of old data.
+ */
+
+ }
+
+ if (level > 0) {
+ idx = file_block / fp->f_nindir[level-1];
+ file_block %= fp->f_nindir[level-1];
+ }
+ else
+ idx = file_block;
+
+ ind_block_num = ((minix_daddr_t *)data)[idx];
+ }
+
+ mutex_unlock(&fp->f_lock);
+
+ /*
+ * After unlocking the file, free any blocks that
+ * we need to free.
+ */
+ for (idx = 0; idx < MINIX_NIADDR; idx++)
+ if (oldsize[idx] != 0)
+ (void) vm_deallocate(mach_task_self(),
+ olddata[idx],
+ oldsize[idx]);
+
+ *disk_block_p = ind_block_num;
+ return (0);
+}
+
+/*
+ * Read a portion of a file into an internal buffer. Return
+ * the location in the buffer and the amount in the buffer.
+ */
+static int
+buf_read_file(fp, offset, buf_p, size_p)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t *buf_p; /* out */
+ vm_size_t *size_p; /* out */
+{
+ register
+ struct minix_super_block *fs;
+ vm_offset_t off;
+ register minix_daddr_t file_block;
+ minix_daddr_t disk_block;
+ int rc;
+ vm_offset_t block_size;
+
+ if (offset >= fp->i_ic.i_size)
+ return (FS_NOT_IN_FILE);
+
+ fs = fp->f_fs;
+
+ off = minix_blkoff(fs, offset);
+ file_block = minix_lblkno(fs, offset);
+ block_size = minix_blksize(fs, fp, file_block);
+
+ if (((daddr_t) file_block) != fp->f_buf_blkno) {
+ rc = block_map(fp, file_block, &disk_block);
+ if (rc != 0)
+ return (rc);
+
+ if (fp->f_buf)
+ (void)vm_deallocate(mach_task_self(),
+ fp->f_buf,
+ fp->f_buf_size);
+
+ if (disk_block == 0) {
+ (void)vm_allocate(mach_task_self(),
+ &fp->f_buf,
+ block_size,
+ TRUE);
+ fp->f_buf_size = block_size;
+ }
+ else {
+ rc = device_read(fp->f_dev,
+ 0,
+ (recnum_t) minix_fsbtodb(fs, disk_block),
+ (int) block_size,
+ (char **) &fp->f_buf,
+ (mach_msg_type_number_t *)&fp->f_buf_size);
+ }
+ if (rc)
+ return (rc);
+
+ fp->f_buf_blkno = (daddr_t) file_block;
+ }
+
+ /*
+ * Return address of byte in buffer corresponding to
+ * offset, and size of remainder of buffer after that
+ * byte.
+ */
+ *buf_p = fp->f_buf + off;
+ *size_p = block_size - off;
+
+ /*
+ * But truncate buffer at end of file.
+ */
+ if (*size_p > fp->i_ic.i_size - offset)
+ *size_p = fp->i_ic.i_size - offset;
+
+ return (0);
+}
+
+/*
+ * Search a directory for a name and return its
+ * i_number.
+ */
+static int
+search_directory(name, fp, inumber_p)
+ char * name;
+ register struct file *fp;
+ ino_t *inumber_p; /* out */
+{
+ vm_offset_t buf;
+ vm_size_t buf_size;
+ vm_offset_t offset;
+ register struct minix_directory_entry *dp;
+ int length;
+ kern_return_t rc;
+ char tmp_name[15];
+
+ length = strlen(name);
+
+ offset = 0;
+ while (offset < fp->i_ic.i_size) {
+ rc = buf_read_file(fp, offset, &buf, &buf_size);
+ if (rc != KERN_SUCCESS)
+ return (rc);
+
+ dp = (struct minix_directory_entry *)buf;
+ if (dp->inode != 0) {
+ strncpy (tmp_name, dp->name, MINIX_NAME_LEN /* XXX it's 14 */);
+ tmp_name[MINIX_NAME_LEN] = '\0';
+ if (strlen(tmp_name) == length &&
+ !strcmp(name, tmp_name))
+ {
+ /* found entry */
+ *inumber_p = dp->inode;
+ return (0);
+ }
+ }
+ offset += 16 /* MINIX dir. entry length - MINIX FS Ver. 1. */;
+ }
+ return (FS_NO_ENTRY);
+}
+
+static int
+read_fs(dev, fsp)
+ mach_port_t dev;
+ struct minix_super_block **fsp;
+{
+ register
+ struct minix_super_block *fs;
+ vm_offset_t buf;
+ mach_msg_type_number_t buf_size;
+ int error;
+
+ /*
+ * Read the super block
+ */
+ error = device_read(dev, 0, (recnum_t) MINIX_SBLOCK, MINIX_SBSIZE,
+ (char **) &buf, &buf_size);
+ if (error)
+ return (error);
+
+ /*
+ * Check the superblock
+ */
+ fs = (struct minix_super_block *)buf;
+ if (fs->s_magic != MINIX_SUPER_MAGIC) {
+ (void) vm_deallocate(mach_task_self(), buf, buf_size);
+ return (FS_INVALID_FS);
+ }
+
+
+ *fsp = fs;
+
+ return 0;
+}
+
+static int
+mount_fs(fp)
+ register struct file *fp;
+{
+ register struct minix_super_block *fs;
+ int error;
+
+ error = read_fs(fp->f_dev, &fp->f_fs);
+ if (error)
+ return (error);
+
+ fs = fp->f_fs;
+
+ /*
+ * Calculate indirect block levels.
+ */
+ {
+ register int mult;
+ register int level;
+
+ mult = 1;
+ for (level = 0; level < MINIX_NIADDR; level++) {
+ mult *= MINIX_NINDIR(fs);
+ fp->f_nindir[level] = mult;
+ }
+ }
+
+ return (0);
+}
+
+static void
+unmount_fs(fp)
+ register struct file *fp;
+{
+ if (file_is_structured(fp)) {
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fp->f_fs,
+ MINIX_SBSIZE);
+ fp->f_fs = 0;
+ }
+}
+
+/*
+ * Open a file.
+ */
+int
+minix_open_file(master_device_port, path, fp)
+ mach_port_t master_device_port;
+ char * path;
+ struct file *fp;
+{
+#define RETURN(code) { rc = (code); goto exit; }
+
+ register char *cp, *component;
+ register int c; /* char */
+ register int rc;
+ ino_t inumber, parent_inumber;
+ int nlinks = 0;
+
+ char namebuf[MAXPATHLEN+1];
+
+ if (path == 0 || *path == '\0') {
+ return FS_NO_ENTRY;
+ }
+
+ /*
+ * Copy name into buffer to allow modifying it.
+ */
+ strcpy(namebuf, path);
+
+ /*
+ * Look for '/dev/xxx' at start of path, for
+ * root device.
+ */
+ if (!strprefix(namebuf, "/dev/")) {
+ printf("no device name\n");
+ return FS_NO_ENTRY;
+ }
+
+ cp = namebuf + 5; /* device */
+ component = cp;
+ while ((c = *cp) != '\0' && c != '/') {
+ cp++;
+ }
+ *cp = '\0';
+
+ bzero (fp, sizeof (struct file));
+
+ rc = device_open(master_device_port,
+ D_READ|D_WRITE,
+ component,
+ &fp->f_dev);
+ if (rc)
+ return rc;
+
+ if (c == 0) {
+ fp->f_fs = 0;
+ goto out_ok;
+ }
+
+ *cp = c;
+
+ rc = mount_fs(fp);
+ if (rc)
+ return rc;
+
+ inumber = (ino_t) MINIX_ROOTINO;
+ if ((rc = read_inode(inumber, fp)) != 0) {
+ printf("can't read root inode\n");
+ goto exit;
+ }
+
+ while (*cp) {
+
+ /*
+ * Check that current node is a directory.
+ */
+ if ((fp->i_ic.i_mode & IFMT) != IFDIR)
+ RETURN (FS_NOT_DIRECTORY);
+
+ /*
+ * Remove extra separators
+ */
+ while (*cp == '/')
+ cp++;
+
+ /*
+ * Get next component of path name.
+ */
+ component = cp;
+ {
+ register int len = 0;
+
+ while ((c = *cp) != '\0' && c != '/') {
+ if (len++ > MINIX_MAXNAMLEN)
+ RETURN (FS_NAME_TOO_LONG);
+ if (c & 0200)
+ RETURN (FS_INVALID_PARAMETER);
+ cp++;
+ }
+ *cp = 0;
+ }
+
+ /*
+ * Look up component in current directory.
+ * Save directory inumber in case we find a
+ * symbolic link.
+ */
+ parent_inumber = inumber;
+ rc = search_directory(component, fp, &inumber);
+ if (rc) {
+ printf("%s: not found\n", path);
+ goto exit;
+ }
+ *cp = c;
+
+ /*
+ * Open next component.
+ */
+ if ((rc = read_inode(inumber, fp)) != 0)
+ goto exit;
+
+ /*
+ * Check for symbolic link.
+ */
+ }
+
+ /*
+ * Found terminal component.
+ */
+ out_ok:
+ mutex_init(&fp->f_lock);
+ return 0;
+
+ /*
+ * At error exit, close file to free storage.
+ */
+ exit:
+ minix_close_file(fp);
+ return rc;
+}
+
+/*
+ * Close file - free all storage used.
+ */
+void
+minix_close_file(fp)
+ register struct file *fp;
+{
+ register int i;
+
+ /*
+ * Free the disk super-block.
+ */
+ unmount_fs(fp);
+
+ /*
+ * Free the inode and data buffers.
+ */
+ free_file_buffers(fp);
+}
+
+int
+minix_file_is_directory(struct file *fp)
+{
+ return (fp->i_ic.i_mode & IFMT) == IFDIR;
+}
+
+int
+minix_file_is_regular(struct file *fp)
+{
+ return (fp->i_ic.i_mode & IFMT) == IFREG;
+}
+
+/*
+ * Copy a portion of a file into kernel memory.
+ * Cross block boundaries when necessary.
+ */
+int
+minix_read_file(fp, offset, start, size, resid)
+ register struct file *fp;
+ vm_offset_t offset;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_size_t *resid; /* out */
+{
+ int rc;
+ register vm_size_t csize;
+ vm_offset_t buf;
+ vm_size_t buf_size;
+
+ while (size != 0) {
+ rc = buf_read_file(fp, offset, &buf, &buf_size);
+ if (rc)
+ return (rc);
+
+ csize = size;
+ if (csize > buf_size)
+ csize = buf_size;
+ if (csize == 0)
+ break;
+
+ bcopy((char *)buf, (char *)start, csize);
+
+ offset += csize;
+ start += csize;
+ size -= csize;
+ }
+ if (resid)
+ *resid = size;
+
+ return (0);
+}
+
+/* simple utility: only works for 2^n */
+static int
+log2(n)
+ register unsigned int n;
+{
+ register int i = 0;
+
+ while ((n & 1) == 0) {
+ i++;
+ n >>= 1;
+ }
+ return i;
+}
+
+/*
+ * Make an empty file_direct for a device.
+ */
+int
+minix_open_file_direct(dev, fdp, is_structured)
+ mach_port_t dev;
+ register struct file_direct *fdp;
+ boolean_t is_structured;
+{
+ struct minix_super_block *fs;
+ int rc;
+
+ if (!is_structured) {
+ fdp->fd_dev = dev;
+ fdp->fd_blocks = (daddr_t *) 0;
+ fdp->fd_bsize = vm_page_size;
+ fdp->fd_bshift = log2(vm_page_size);
+ fdp->fd_fsbtodb = 0; /* later */
+ fdp->fd_size = 0; /* later */
+ return 0;
+ }
+
+ rc = read_fs(dev, &fs);
+ if (rc)
+ return rc;
+
+ fdp->fd_dev = dev;
+ fdp->fd_blocks = (daddr_t *) 0;
+ fdp->fd_size = 0;
+ fdp->fd_bsize = MINIX_BLOCK_SIZE;
+ fdp->fd_bshift = log2(fdp->fd_bsize);
+ fdp->fd_fsbtodb = log2(fdp->fd_bsize / DEV_BSIZE);
+
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fs,
+ MINIX_SBSIZE);
+
+ return 0;
+}
+
+/*
+ * Add blocks from a file to a file_direct.
+ */
+int
+minix_add_file_direct(fdp, fp)
+ register struct file_direct *fdp;
+ register struct file *fp;
+{
+ register struct minix_super_block *fs;
+ long num_blocks, i;
+ vm_offset_t buffer;
+ vm_size_t size;
+ int rc;
+
+ /* the file must be on the same device */
+
+ if (fdp->fd_dev != fp->f_dev)
+ return FS_INVALID_FS;
+
+ if (!file_is_structured(fp)) {
+ int result[DEV_GET_SIZE_COUNT];
+ natural_t count;
+
+ count = DEV_GET_SIZE_COUNT;
+ rc = device_get_status( fdp->fd_dev, DEV_GET_SIZE,
+ result, &count);
+ if (rc)
+ return rc;
+ fdp->fd_size = result[DEV_GET_SIZE_DEVICE_SIZE] >> fdp->fd_bshift;
+ fdp->fd_fsbtodb = log2(fdp->fd_bsize/result[DEV_GET_SIZE_RECORD_SIZE]);
+ return 0;
+ }
+
+ /* it must hold a file system */
+
+ fs = fp->f_fs;
+/*
+ if (fdp->fd_bsize != fs->fs_bsize ||
+ fdp->fd_fsbtodb != fs->fs_fsbtodb)
+*/
+ if (fdp->fd_bsize != MINIX_BLOCK_SIZE)
+ return FS_INVALID_FS;
+
+ /* calculate number of blocks in the file, ignoring fragments */
+
+ num_blocks = minix_lblkno(fs, fp->i_ic.i_size);
+
+ /* allocate memory for a bigger array */
+
+ size = (num_blocks + fdp->fd_size) * sizeof(minix_daddr_t);
+ rc = vm_allocate(mach_task_self(), &buffer, size, TRUE);
+ if (rc != KERN_SUCCESS)
+ return rc;
+
+ /* lookup new block addresses */
+
+ for (i = 0; i < num_blocks; i++) {
+ minix_daddr_t disk_block;
+
+ rc = block_map(fp, (minix_daddr_t) i, &disk_block);
+ if (rc != 0) {
+ (void) vm_deallocate(mach_task_self(), buffer, size);
+ return rc;
+ }
+
+ ((minix_daddr_t *) buffer)[fdp->fd_size + i] = disk_block;
+ }
+
+ /* copy old addresses and install the new array */
+
+ if (fdp->fd_blocks != 0) {
+ bcopy((char *) fdp->fd_blocks, (char *) buffer,
+ fdp->fd_size * sizeof(minix_daddr_t));
+
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fdp->fd_blocks,
+ (vm_size_t) (fdp->fd_size * sizeof(minix_daddr_t)));
+ }
+ fdp->fd_blocks = (daddr_t *) buffer;
+ fdp->fd_size += num_blocks;
+
+ /* deallocate cached blocks */
+
+ free_file_buffers(fp);
+
+ return 0;
+}
+
+int
+minix_remove_file_direct(fdp)
+ struct file_direct *fdp;
+{
+ if (fdp->fd_blocks)
+ (void) vm_deallocate(mach_task_self(),
+ (vm_offset_t) fdp->fd_blocks,
+ (vm_size_t) (fdp->fd_size * sizeof(minix_daddr_t)));
+ fdp->fd_blocks = 0; /* sanity */
+ /* xxx should lose a ref to fdp->fd_dev here (and elsewhere) xxx */
+}
+
+/*
+ * Special read and write routines for default pager.
+ * Assume that all offsets and sizes are multiples
+ * of DEV_BSIZE.
+ */
+
+#define minix_fdir_blkoff(fdp, offset) /* offset % fd_bsize */ \
+ ((offset) & ((fdp)->fd_bsize - 1))
+#define minix_fdir_lblkno(fdp, offset) /* offset / fd_bsize */ \
+ ((offset) >> (fdp)->fd_bshift)
+
+#define minix_fdir_fsbtodb(fdp, block) /* offset * fd_bsize / DEV_BSIZE */ \
+ ((block) << (fdp)->fd_fsbtodb)
+
+/*
+ * Read all or part of a data block, and
+ * return a pointer to the appropriate part.
+ * Caller must deallocate the block when done.
+ */
+int
+minix_page_read_file_direct(fdp, offset, size, addr, size_read)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_size_t size;
+ vm_offset_t *addr; /* out */
+ mach_msg_type_number_t *size_read; /* out */
+{
+ vm_offset_t off;
+ register minix_daddr_t file_block;
+ minix_daddr_t disk_block;
+
+ if (offset % DEV_BSIZE != 0 ||
+ size % DEV_BSIZE != 0)
+ panic("page_read_file_direct");
+
+ if (offset >= (fdp->fd_size << fdp->fd_bshift))
+ return (FS_NOT_IN_FILE);
+
+ off = minix_fdir_blkoff(fdp, offset);
+ file_block = minix_fdir_lblkno(fdp, offset);
+
+ if (file_is_device(fdp)) {
+ disk_block = file_block;
+ } else {
+ disk_block = ((minix_daddr_t *)fdp->fd_blocks)[file_block];
+ if (disk_block == 0)
+ return (FS_NOT_IN_FILE);
+ }
+
+ if (size > fdp->fd_bsize)
+ size = fdp->fd_bsize;
+
+ return (device_read(fdp->fd_dev,
+ 0,
+ (recnum_t) (minix_fdir_fsbtodb(fdp, disk_block) + btodb(off)),
+ (int) size,
+ (char **) addr,
+ size_read));
+}
+
+/*
+ * Write all or part of a data block, and
+ * return the amount written.
+ */
+int
+minix_page_write_file_direct(fdp, offset, addr, size, size_written)
+ register struct file_direct *fdp;
+ vm_offset_t offset;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_offset_t *size_written; /* out */
+{
+ vm_offset_t off;
+ register minix_daddr_t file_block;
+ minix_daddr_t disk_block;
+ int rc, num_written;
+ vm_offset_t block_size;
+
+ if (offset % DEV_BSIZE != 0 ||
+ size % DEV_BSIZE != 0)
+ panic("page_write_file");
+
+ if (offset >= (fdp->fd_size << fdp->fd_bshift))
+ return (FS_NOT_IN_FILE);
+
+ off = minix_fdir_blkoff(fdp, offset);
+ file_block = minix_fdir_lblkno(fdp, offset);
+
+ if (file_is_device(fdp)) {
+ disk_block = file_block;
+ } else {
+ disk_block = ((minix_daddr_t *)fdp->fd_blocks)[file_block];
+ if (disk_block == 0)
+ return (FS_NOT_IN_FILE);
+ }
+
+ if (size > fdp->fd_bsize)
+ size = fdp->fd_bsize;
+
+ /*
+ * Write the data. Wait for completion to keep
+ * reads from getting ahead of writes and reading
+ * stale data.
+ */
+ rc = device_write(
+ fdp->fd_dev,
+ 0,
+ (recnum_t) (minix_fdir_fsbtodb(fdp, disk_block) + btodb(off)),
+ (char *) addr,
+ size,
+ &num_written);
+ *size_written = num_written;
+ return rc;
+}
diff --git a/serverboot/minix_fs.h b/serverboot/minix_fs.h
new file mode 100644
index 00000000..678f3a0d
--- /dev/null
+++ b/serverboot/minix_fs.h
@@ -0,0 +1,107 @@
+/*
+ * minix_fs.h
+ * stolen (and slightly extended by csb) from the Linux distribution
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_MINIX_FS_H
+#define _LINUX_MINIX_FS_H
+
+/*
+ * The minix filesystem constants/structures
+ */
+
+/*
+ * Thanks to Kees J Bot for sending me the definitions of the new
+ * minix filesystem (aka V2) with bigger inodes and 32-bit block
+ * pointers. It's not actually implemented yet, but I'll look into
+ * it.
+ */
+
+#define MINIX_ROOT_INO 1
+
+/* Not the same as the bogus LINK_MAX in <linux/limits.h>. Oh well. */
+#define MINIX_LINK_MAX 250
+
+#define MINIX_I_MAP_SLOTS 8
+#define MINIX_Z_MAP_SLOTS 8
+#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */
+#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */
+#define NEW_MINIX_SUPER_MAGIC 0x2468 /* minix V2 - not implemented */
+#define MINIX_VALID_FS 0x0001 /* Clean fs. */
+#define MINIX_ERROR_FS 0x0002 /* fs has errors. */
+
+#define MINIX_INODES_PER_BLOCK ((MINIX_BLOCK_SIZE)/(sizeof (struct minix_inode)))
+
+struct minix_inode {
+ unsigned short i_mode;
+ unsigned short i_uid;
+ unsigned long i_size;
+ unsigned long i_time;
+ unsigned char i_gid;
+ unsigned char i_nlinks;
+ unsigned short i_zone[9];
+};
+
+/*
+ * The new minix inode has all the time entries, as well as
+ * long block numbers and a third indirect block (7+1+1+1
+ * instead of 7+1+1). Also, some previously 8-bit values are
+ * now 16-bit. The inode is now 64 bytes instead of 32.
+ */
+struct new_minix_inode {
+ unsigned short i_mode;
+ unsigned short i_nlinks;
+ unsigned short i_uid;
+ unsigned short i_gid;
+ unsigned long i_size;
+ unsigned long i_atime;
+ unsigned long i_mtime;
+ unsigned long i_ctime;
+ unsigned long i_zone[10];
+};
+
+/*
+ * minix super-block data on disk
+ */
+struct minix_super_block {
+ unsigned short s_ninodes;
+ unsigned short s_nzones;
+ unsigned short s_imap_blocks;
+ unsigned short s_zmap_blocks;
+ unsigned short s_firstdatazone;
+ unsigned short s_log_zone_size;
+ unsigned long s_max_size;
+ unsigned short s_magic;
+ unsigned short s_state;
+};
+
+struct minix_dir_entry {
+ unsigned short inode;
+ char name[0];
+};
+
+struct minix_directory_entry {
+ unsigned short inode;
+ char name[14];
+};
+
+#define MINIX_NIADDR 2
+
+typedef unsigned short minix_daddr_t;
+
+#endif
diff --git a/serverboot/minix_super.h b/serverboot/minix_super.h
new file mode 100644
index 00000000..144cf064
--- /dev/null
+++ b/serverboot/minix_super.h
@@ -0,0 +1,49 @@
+/*
+ * minix_super.h
+ * stolen from the Linux distribution
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_MINIX_FS_H
+#define _LINUX_MINIX_FS_H
+
+struct minix_super_block {
+ unsigned short s_ninodes;
+ unsigned short s_nzones;
+ unsigned short s_imap_blocks;
+ unsigned short s_zmap_blocks;
+ unsigned short s_firstdatazone;
+ unsigned short s_log_zone_size;
+ unsigned long s_max_size;
+ unsigned short s_magic;
+ unsigned short s_state;
+};
+
+
+struct minix_inode {
+ unsigned short i_mode;
+ unsigned short i_uid;
+ unsigned long i_size;
+ unsigned long i_time;
+ unsigned char i_gid;
+ unsigned char i_nlinks;
+ unsigned short i_zone[9];
+};
+
+#define MINIX_NIADDR 2
+
+#endif
diff --git a/serverboot/panic.c b/serverboot/panic.c
new file mode 100644
index 00000000..80197500
--- /dev/null
+++ b/serverboot/panic.c
@@ -0,0 +1,59 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+#include <mach/port.h>
+#include <varargs.h>
+#include <stdio.h>
+
+static mach_port_t master_host_port;
+
+panic_init(port)
+ mach_port_t port;
+{
+ master_host_port = port;
+}
+
+/*VARARGS1*/
+panic(s, va_alist)
+ char *s;
+ va_dcl
+{
+ va_list listp;
+
+ clearerr (stdout);
+ printf("bootstrap/default-pager panic: ");
+ va_start(listp);
+ vprintf(s, listp);
+ va_end(listp);
+ printf("\n");
+
+#ifdef PC532
+ { int l; for (l=0;l < 1000000;l++) ; }
+#endif PC532
+#define RB_DEBUGGER 0x1000 /* enter debugger NOW */
+ (void) host_reboot(master_host_port, RB_DEBUGGER);
+ for (;;);
+}
diff --git a/serverboot/queue.h b/serverboot/queue.h
new file mode 100644
index 00000000..3e93476f
--- /dev/null
+++ b/serverboot/queue.h
@@ -0,0 +1,316 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon rights
+ * to redistribute these changes.
+ */
+/*
+ * File: queue.h
+ * Author: Avadis Tevanian, Jr.
+ * Date: 1985
+ *
+ * Type definitions for generic queues.
+ *
+ */
+
+#ifndef _QUEUE_H_
+#define _QUEUE_H_
+
+/*
+ * Queue of abstract objects. Queue is maintained
+ * within that object.
+ *
+ * Supports fast removal from within the queue.
+ *
+ * How to declare a queue of elements of type "foo_t":
+ * In the "*foo_t" type, you must have a field of
+ * type "queue_chain_t" to hold together this queue.
+ * There may be more than one chain through a
+ * "foo_t", for use by different queues.
+ *
+ * Declare the queue as a "queue_t" type.
+ *
+ * Elements of the queue (of type "foo_t", that is)
+ * are referred to by reference, and cast to type
+ * "queue_entry_t" within this module.
+ */
+
+/*
+ * A generic doubly-linked list (queue).
+ */
+
+struct queue_entry {
+ struct queue_entry *next; /* next element */
+ struct queue_entry *prev; /* previous element */
+};
+
+typedef struct queue_entry *queue_t;
+typedef struct queue_entry queue_head_t;
+typedef struct queue_entry queue_chain_t;
+typedef struct queue_entry *queue_entry_t;
+
+/*
+ * Macro: queue_init
+ * Function:
+ * Initialize the given queue.
+ * Header:
+ * void queue_init(q)
+ * queue_t q; / * MODIFIED * /
+ */
+#define queue_init(q) ((q)->next = (q)->prev = q)
+
+/*
+ * Macro: queue_first
+ * Function:
+ * Returns the first entry in the queue,
+ * Header:
+ * queue_entry_t queue_first(q)
+ * queue_t q; / * IN * /
+ */
+#define queue_first(q) ((q)->next)
+
+/*
+ * Macro: queue_next
+ * Function:
+ * Returns the entry after an item in the queue.
+ * Header:
+ * queue_entry_t queue_next(qc)
+ * queue_t qc;
+ */
+#define queue_next(qc) ((qc)->next)
+
+/*
+ * Macro: queue_last
+ * Function:
+ * Returns the last entry in the queue.
+ * Header:
+ * queue_entry_t queue_last(q)
+ * queue_t q; / * IN * /
+ */
+#define queue_last(q) ((q)->prev)
+
+/*
+ * Macro: queue_prev
+ * Function:
+ * Returns the entry before an item in the queue.
+ * Header:
+ * queue_entry_t queue_prev(qc)
+ * queue_t qc;
+ */
+#define queue_prev(qc) ((qc)->prev)
+
+/*
+ * Macro: queue_end
+ * Function:
+ * Tests whether a new entry is really the end of
+ * the queue.
+ * Header:
+ * boolean_t queue_end(q, qe)
+ * queue_t q;
+ * queue_entry_t qe;
+ */
+#define queue_end(q, qe) ((q) == (qe))
+
+/*
+ * Macro: queue_empty
+ * Function:
+ * Tests whether a queue is empty.
+ * Header:
+ * boolean_t queue_empty(q)
+ * queue_t q;
+ */
+#define queue_empty(q) queue_end((q), queue_first(q))
+
+
+/*----------------------------------------------------------------*/
+/*
+ * Macros that operate on generic structures. The queue
+ * chain may be at any location within the structure, and there
+ * may be more than one chain.
+ */
+
+/*
+ * Macro: queue_enter
+ * Function:
+ * Insert a new element at the tail of the queue.
+ * Header:
+ * void queue_enter(q, elt, type, field)
+ * queue_t q;
+ * <type> elt;
+ * <type> is what's in our queue
+ * <field> is the chain field in (*<type>)
+ */
+#define queue_enter(head, elt, type, field) \
+{ \
+ register queue_entry_t prev; \
+ \
+ prev = (head)->prev; \
+ if ((head) == prev) { \
+ (head)->next = (queue_entry_t) (elt); \
+ } \
+ else { \
+ ((type)prev)->field.next = (queue_entry_t)(elt);\
+ } \
+ (elt)->field.prev = prev; \
+ (elt)->field.next = head; \
+ (head)->prev = (queue_entry_t) elt; \
+}
+
+/*
+ * Macro: queue_enter_first
+ * Function:
+ * Insert a new element at the head of the queue.
+ * Header:
+ * void queue_enter_first(q, elt, type, field)
+ * queue_t q;
+ * <type> elt;
+ * <type> is what's in our queue
+ * <field> is the chain field in (*<type>)
+ */
+#define queue_enter_first(head, elt, type, field) \
+{ \
+ register queue_entry_t next; \
+ \
+ next = (head)->next; \
+ if ((head) == next) { \
+ (head)->prev = (queue_entry_t) (elt); \
+ } \
+ else { \
+ ((type)next)->field.prev = (queue_entry_t)(elt);\
+ } \
+ (elt)->field.next = next; \
+ (elt)->field.prev = head; \
+ (head)->next = (queue_entry_t) elt; \
+}
+
+/*
+ * Macro: queue_field [internal use only]
+ * Function:
+ * Find the queue_chain_t (or queue_t) for the
+ * given element (thing) in the given queue (head)
+ */
+#define queue_field(head, thing, type, field) \
+ (((head) == (thing)) ? (head) : &((type)(thing))->field)
+
+/*
+ * Macro: queue_remove
+ * Function:
+ * Remove an arbitrary item from the queue.
+ * Header:
+ * void queue_remove(q, qe, type, field)
+ * arguments as in queue_enter
+ */
+#define queue_remove(head, elt, type, field) \
+{ \
+ register queue_entry_t next, prev; \
+ \
+ next = (elt)->field.next; \
+ prev = (elt)->field.prev; \
+ \
+ if ((head) == next) \
+ (head)->prev = prev; \
+ else \
+ ((type)next)->field.prev = prev; \
+ \
+ if ((head) == prev) \
+ (head)->next = next; \
+ else \
+ ((type)prev)->field.next = next; \
+}
+
+/*
+ * Macro: queue_remove_first
+ * Function:
+ * Remove and return the entry at the head of
+ * the queue.
+ * Header:
+ * queue_remove_first(head, entry, type, field)
+ * entry is returned by reference
+ */
+#define queue_remove_first(head, entry, type, field) \
+{ \
+ register queue_entry_t next; \
+ \
+ (entry) = (type) ((head)->next); \
+ next = (entry)->field.next; \
+ \
+ if ((head) == next) \
+ (head)->prev = (head); \
+ else \
+ ((type)(next))->field.prev = (head); \
+ (head)->next = next; \
+}
+
+/*
+ * Macro: queue_remove_last
+ * Function:
+ * Remove and return the entry at the tail of
+ * the queue.
+ * Header:
+ * queue_remove_last(head, entry, type, field)
+ * entry is returned by reference
+ */
+#define queue_remove_last(head, entry, type, field) \
+{ \
+ register queue_entry_t prev; \
+ \
+ (entry) = (type) ((head)->prev); \
+ prev = (entry)->field.prev; \
+ \
+ if ((head) == prev) \
+ (head)->next = (head); \
+ else \
+ ((type)(prev))->field.next = (head); \
+ (head)->prev = prev; \
+}
+
+/*
+ * Macro: queue_assign
+ */
+#define queue_assign(to, from, type, field) \
+{ \
+ ((type)((from)->prev))->field.next = (to); \
+ ((type)((from)->next))->field.prev = (to); \
+ *to = *from; \
+}
+
+/*
+ * Macro: queue_iterate
+ * Function:
+ * iterate over each item in the queue.
+ * Generates a 'for' loop, setting elt to
+ * each item in turn (by reference).
+ * Header:
+ * queue_iterate(q, elt, type, field)
+ * queue_t q;
+ * <type> elt;
+ * <type> is what's in our queue
+ * <field> is the chain field in (*<type>)
+ */
+#define queue_iterate(head, elt, type, field) \
+ for ((elt) = (type) queue_first(head); \
+ !queue_end((head), (queue_entry_t)(elt)); \
+ (elt) = (type) queue_next(&(elt)->field))
+
+
+
+#endif _QUEUE_H_
diff --git a/serverboot/strfcns.c b/serverboot/strfcns.c
new file mode 100644
index 00000000..53c097ba
--- /dev/null
+++ b/serverboot/strfcns.c
@@ -0,0 +1,117 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Character subroutines
+ */
+
+#include <varargs.h>
+
+#define EXPORT_BOOLEAN
+#include <mach/boolean.h>
+
+/*
+ * Concatenate a group of strings together into a buffer.
+ * Return a pointer to the trailing '\0' character in
+ * the result string.
+ * The list of strings ends with a '(char *)0'.
+ */
+/*VARARGS1*/
+char *
+strbuild(dest, va_alist)
+ register char * dest;
+ va_dcl
+{
+ va_list argptr;
+ register char * src;
+ register int c;
+
+ va_start(argptr);
+ while ((src = va_arg(argptr, char *)) != (char *)0) {
+
+ while ((c = *src++) != '\0')
+ *dest++ = c;
+ }
+ *dest = '\0';
+ return (dest);
+}
+
+/*
+ * Return TRUE if string 2 is a prefix of string 1.
+ */
+boolean_t
+strprefix(s1, s2)
+ register char *s1, *s2;
+{
+ register int c;
+
+ while ((c = *s2++) != '\0') {
+ if (c != *s1++)
+ return (FALSE);
+ }
+ return (TRUE);
+}
+
+/*
+ * ovbcopy - like bcopy, but recognizes overlapping ranges and handles
+ * them correctly.
+ */
+ovbcopy(from, to, bytes)
+ char *from, *to;
+ int bytes; /* num bytes to copy */
+{
+ /* Assume that bcopy copies left-to-right (low addr first). */
+ if (from + bytes <= to || to + bytes <= from || to == from)
+ bcopy(from, to, bytes); /* non-overlapping or no-op*/
+ else if (from > to)
+ bcopy(from, to, bytes); /* overlapping but OK */
+ else {
+ /* to > from: overlapping, and must copy right-to-left. */
+ from += bytes - 1;
+ to += bytes - 1;
+ while (bytes-- > 0)
+ *to-- = *from--;
+ }
+}
+
+/*
+ * Return a pointer to the first occurence of 'c' in
+ * string s, or 0 if none.
+ */
+char *
+index(s, c)
+ char *s;
+ char c;
+{
+ char cc;
+
+ while ((cc = *s) != c) {
+ if (cc == 0)
+ return 0;
+ s++;
+ }
+ return s;
+}
+
diff --git a/serverboot/translate_root.c b/serverboot/translate_root.c
new file mode 100644
index 00000000..b544d5c8
--- /dev/null
+++ b/serverboot/translate_root.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 1995 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Stephen Clawson, University of Utah CSL
+ */
+
+
+#include "translate_root.h"
+
+unsigned int atoh(ap)
+ char *ap;
+{
+ register char *p;
+ register unsigned int n;
+ register int digit,lcase;
+
+ p = ap;
+ n = 0;
+ while(*p == ' ')
+ p++;
+ while ((digit = (*p >= '0' && *p <= '9')) ||
+ (lcase = (*p >= 'a' && *p <= 'f')) ||
+ (*p >= 'A' && *p <= 'F')) {
+ n *= 16;
+ if (digit) n += *p++ - '0';
+ else if (lcase) n += 10 + (*p++ - 'a');
+ else n += 10 + (*p++ - 'A');
+ }
+ return(n);
+}
+
+/*
+ * Translate the root device from whatever strange encoding we might
+ * be given. Currently that includes BSD's slightly different name
+ * for IDE devices, and Linux's device number encoding (since that's
+ * what LILO passes us, for whatever reason).
+ */
+char *
+translate_root(root_string)
+ char *root_string;
+{
+ int linuxdev = atoh(root_string);
+
+ /* LILO passes us a string representing the linux device number of
+ * our root device. Since this is _not_ what we want, we'll make
+ * a stab at converting it.
+ *
+ * Linux major numbers we care about:
+ *
+ * 2 = fd
+ * 3 = hd[ab] (IDE channel 1)
+ * 8 = sd
+ * 22 = hd[cd] (IDE channel 2)
+ *
+ */
+ if (linuxdev) {
+ if (LINUX_MAJOR(linuxdev) == 2) {
+ root_string[0] = 'f';
+ root_string[1] = 'd';
+ root_string[2] = LINUX_FD_DEVICE_NR(linuxdev) + '0';
+ root_string[3] = '\0';
+ } else {
+ int shift;
+
+ switch (LINUX_MAJOR(linuxdev)) {
+ case 3:
+ case 22:
+ shift = 6;
+ root_string[0] = 'h';
+ break;
+ case 8:
+ shift = 4;
+ root_string[0] = 's';
+ break;
+ default:
+ printf("Unknown linux device"
+ "(major = %d, minor = %d) passed as "
+ "root argument!\n"
+ "using hd0a as default.\n",
+ LINUX_MAJOR(linuxdev),
+ LINUX_MINOR(linuxdev));
+ shift = 1;
+ root_string[0] = 'h';
+ linuxdev = 1;
+ }
+
+ root_string[1] = 'd';
+ root_string[2] = LINUX_DEVICE_NR(linuxdev, shift)+'0';
+ root_string[3] = LINUX_PARTN(linuxdev, shift)+'a' - 1;
+ root_string[4] = '\0';
+ }
+ } else
+ /* This could be handled much simpler in the BSD boot
+ * adapter code, but baford insists that the boot
+ * adapter code shouldn't be tainted by Mach's notion
+ * of the `correct' device naming. Thus, we get wdxx
+ * instead of hdxx if booted from the BSD bootblocks,
+ * and this is the lame hack that tries to convert it.
+ */
+ if (root_string[0] == 'w' && root_string[1] == 'd')
+ root_string[0] = 'h';
+
+ return root_string;
+}
+
+
+
diff --git a/serverboot/translate_root.h b/serverboot/translate_root.h
new file mode 100644
index 00000000..e5bab70a
--- /dev/null
+++ b/serverboot/translate_root.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1995 The University of Utah and
+ * the Computer Systems Laboratory at the University of Utah (CSL).
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the
+ * Computer Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ *
+ * Author: Stephen Clawson, University of Utah CSL
+ */
+
+#ifndef _TRANSLATE_ROOT_H_
+#define _TRANSLATE_ROOT_H_
+
+#define DEFAULT_ROOT "hd0a"
+
+extern char *translate_root(char *);
+
+#define LINUX_MAJOR(a) (int)((unsigned short)(a) >> 8)
+#define LINUX_MINOR(a) (int)((unsigned short)(a) & 0xFF)
+
+#define LINUX_PARTN(device, shift) \
+ (LINUX_MINOR(device) & ((1 << (shift)) - 1))
+#define LINUX_DEVICE_NR(device, shift) \
+ (LINUX_MINOR(device) >> (shift))
+#define LINUX_FD_DEVICE_NR(device) \
+ ( ((device) & 3) | (((device) & 0x80 ) >> 5 ))
+
+#endif /* _TRANSLATE_ROOT_H_ */
diff --git a/serverboot/wiring.c b/serverboot/wiring.c
new file mode 100644
index 00000000..550c1bec
--- /dev/null
+++ b/serverboot/wiring.c
@@ -0,0 +1,140 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+/*
+ * Package to wire current task's memory.
+ */
+#include <mach.h>
+#include <mach_init.h>
+#include <mach/machine/vm_param.h>
+
+mach_port_t this_task; /* our task */
+mach_port_t priv_host_port = MACH_PORT_NULL;
+ /* the privileged host port */
+
+void
+wire_setup(host_priv)
+ mach_port_t host_priv;
+{
+ priv_host_port = host_priv;
+ this_task = mach_task_self();
+}
+
+void
+wire_memory(start, size, prot)
+ vm_address_t start;
+ vm_size_t size;
+ vm_prot_t prot;
+{
+ kern_return_t kr;
+
+ if (priv_host_port == MACH_PORT_NULL)
+ return;
+
+ kr = vm_wire(priv_host_port,
+ this_task,
+ start, size, prot);
+ if (kr != KERN_SUCCESS)
+ panic("mem_wire: %d", kr);
+}
+
+void
+wire_thread()
+{
+ kern_return_t kr;
+
+ if (priv_host_port == MACH_PORT_NULL)
+ return;
+
+ kr = thread_wire(priv_host_port,
+ mach_thread_self(),
+ TRUE);
+ if (kr != KERN_SUCCESS)
+ panic("wire_thread: %d", kr);
+}
+
+void
+wire_all_memory()
+{
+ register kern_return_t kr;
+ vm_offset_t address;
+ vm_size_t size;
+ vm_prot_t protection;
+ vm_prot_t max_protection;
+ vm_inherit_t inheritance;
+ boolean_t is_shared;
+ memory_object_name_t object;
+ vm_offset_t offset;
+
+ if (priv_host_port == MACH_PORT_NULL)
+ return;
+
+ /* iterate thru all regions, wiring */
+ address = 0;
+ while (
+ (kr = vm_region(this_task, &address,
+ &size,
+ &protection,
+ &max_protection,
+ &inheritance,
+ &is_shared,
+ &object,
+ &offset))
+ == KERN_SUCCESS)
+ {
+ if (MACH_PORT_VALID(object))
+ (void) mach_port_deallocate(this_task, object);
+ if (protection != VM_PROT_NONE)
+ wire_memory(address, size, protection);
+ address += size;
+ }
+}
+
+/*
+ * Alias for vm_allocate to return wired memory.
+ */
+kern_return_t
+vm_allocate(task, address, size, anywhere)
+ task_t task;
+ vm_address_t *address;
+ vm_size_t size;
+ boolean_t anywhere;
+{
+ kern_return_t kr;
+
+ if (anywhere)
+ *address = VM_MIN_ADDRESS;
+ kr = vm_map(task,
+ address, size, (vm_offset_t) 0, anywhere,
+ MEMORY_OBJECT_NULL, (vm_offset_t)0, FALSE,
+ VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
+ if (kr != KERN_SUCCESS)
+ return kr;
+
+ if (task == this_task)
+ (void) vm_wire(priv_host_port, task, *address, size,
+ VM_PROT_DEFAULT);
+ return KERN_SUCCESS;
+}
diff --git a/serverboot/wiring.h b/serverboot/wiring.h
new file mode 100644
index 00000000..b5f8e53f
--- /dev/null
+++ b/serverboot/wiring.h
@@ -0,0 +1,35 @@
+/*
+ * Mach Operating System
+ * Copyright (c) 1991 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon the
+ * rights to redistribute these changes.
+ */
+/*
+ * Package to wire current task's memory.
+ */
+#include <mach.h>
+#include <mach_init.h>
+
+extern void wire_setup(/* mach_port_t host_priv */);
+extern void wire_memory(/* vm_address_t, vm_size_t, vm_prot_t */);
+extern void wire_thread();
+extern void wire_all_memory();