diff options
Diffstat (limited to 'serverboot')
35 files changed, 11745 insertions, 0 deletions
diff --git a/serverboot/ChangeLog b/serverboot/ChangeLog new file mode 100644 index 00000000..b9ef7604 --- /dev/null +++ b/serverboot/ChangeLog @@ -0,0 +1,336 @@ +2000-01-03 Roland McGrath <roland@baalperazim.frob.com> + + * file_io.h: Include "../ext2fs/ext2_fs.h" instead of "ext2_fs.h". + * ext2_file_io.c (search_directory): Use `struct ext2_dir_entry_2' in + place of `struct ext2_dir_entry', so as to grok newer dir formats. + * ext2_fs.h: File removed. + * Makefile (LCLHDRS): Remove it from the list. + * ffs_compat.c (EXT2_INODES_PER_BLOCK): New macro, no longer in + ext2_fs.h. + +1999-11-18 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (default_pager): Instead of suspending this thread, + just become the default_pager_default_thread thread ourselves. + + * wiring.c (wire_all_memory): Touch pages before wiring. + +1999-11-16 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (default_pager_initialize): Use MACH_PORT_VALID to + check old DMM port, rather than just checking for MACH_PORT_NULL. + + * strfcns.c (index): Function removed. + + * ffs_file_io.c (ffs_open_file): Use memmove instead of ovbcopy. + * ext2_file_io.c (ext2_open_file): Likewise. + * strfcns.c (ovbcopy): Function removed. + +1999-11-08 Roland McGrath <roland@baalperazim.frob.com> + + * bootstrap.c (main): Further spruce up prompting and error reporting + for root device and boot script. + +1999-10-28 Roland McGrath <roland@baalperazim.frob.com> + + * load.c, bootstrap.c: Back out some debugging printfs accidentally + included in the last commit. + * bootstrap.c (main): Increase size of NEW_ROOT buffer. + This change was included in the last commit, but not logged then. + + * file_io.h: Include <errno.h>. + (FS_* macros): Define these to equivalent errno codes. + * def_pager_setup.c (add_paging_file): Put strerror of result code in + error messages. + * bootstrap.c (parse_script): Likewise. + * load.c (boot_script_exec_cmd): Include NAMEBUF in error messages, + and use strerror to format result code. + * panic.c (panic): Use program_invocation_name in message. + +1999-10-08 Thomas Bushnell, BSG <tb@mit.edu> + + * bootstrap.c: (DEFAULT_ROOT): Remove macro. + (main): If no root device was specified, then prompt as if the + user had specified -a. Don't use strcpy to move NEW_ROOT into + ROOT_NAME; that's unsafe. + +1999-10-06 Roland McGrath <roland@baalperazim.frob.com> + + * bootstrap.c (DEFAULT_ROOT): Change to "hd0s1". + +1999-08-20 Roland McGrath <roland@baalperazim.frob.com> + + * bootstrap.c (parse_script): Add one to the buffer size so we can + null-terminate after the end of the file. + +1999-07-20 Roland McGrath <roland@baalperazim.frob.com> + + * ext2_file_io.c, ffs_file_io.c, minix_file_io.c: Remove fs-specific + routines *_page_{read,write}_file_direct. Moved to ... + * file_io.c (page_read_file_direct, page_write_file_direct): Don't + call fs-specific routine, these are not fs-specific. Instead, + replaced with former fs-specific routines (which were all identical). + Read and write more than a fs block when disk blocks are contiguous. + + * default_pager.c (new_partition): When reading Linux signature page, + handle pager_read_file_direct returning sub-page blocks. + +1999-06-03 Mark Kettenis <kettenis@gnu.org> + + * default_pager.c (new_partition): Declare `waste' as `int' + instead of `unsigned int'. + +1999-06-02 Roland McGrath <roland@baalperazim.frob.com> + + * load.c (boot_script_exec_cmd): Copy environment from our `environ' + onto the new task's stack along with its arguments. + + * bootstrap.c (main): If we have a MULTIBOOT_CMDLINE environment + variable, set its value as ${kernel-command-line} for boot scripts. + +1999-05-29 Roland McGrath <roland@baalperazim.frob.com> + + * bootstrap.c (main): Define $(serverboot) function for "serverboot + controls", control command replacing the pathname. For command "die", + terminate ourselves after running the boot script, no default pager. + +1999-05-23 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (new_partition): Fix arg order in printf for + linux-2.2 signature page. + +1999-05-20 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (new_partition): Fix SWAP-SPACE page handling. + +1999-05-17 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (new_partition): Deallocate signature page when + rejecting it for wrong version. Fix typo in SWAPSPACE2 v1 bad block + bitmap handling. Free bitmap and partition structure if we reject the + partition signature. + Reported by Kalle Olavi Niemitalo <tosi@ees2.oulu.fi>. + + * default_pager.c (new_partition): Print out for + CHECK_LINUX_SIGNATURE<0 case. + +1999-05-15 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (new_partition): Check if requested partition is + already in our list, and refuse it. + + * bootstrap.c (main): Remove vars HAD_A_PARITION, DOING_DEFAULT_PAGER. + Remove $(default-pager) boot script tag. We always stick around and + act as the default pager (this was already the case, just removed some + dead code). Don't print anything about having no swap partitions, + since that is a fine way to boot (just use swapon later). + + * default_pager.c (new_partition): Print out partition name and size + when no signature. + +1998-09-06 OKUJI Yoshinori <okuji@kuicr.kyoto-u.ac.jp> + + * bunzip2.c: New file. + * load.c (GZIP) (BZIP2): New cpp constants. + (boot_script_exec_cmd): If GZIP is defined, gunzip engine is enabled. + If BZIP2 is defined, bunzip2 engine is enabled. + * Makefile (SRCS): Add bunzip2.c. + (UNZIP_OBJS): Add do-bunzip2.o. + (CPPFLAGS): Add -DGZIP, -DBZIP2 and -DSMALL_BZIP2. + +1998-09-03 OKUJI Yoshinori <okuji@kuicr.kyoto-u.ac.jp> + + * gunzip.c: New file. + Copy libstore/gunzip.c and modify for use in serverboot. + * load.c (struct stuff): Add members, image_addr and image_size. + (mem_read) (mem_read_exec): New functions. + (boot_script_exec_cmd): Add gzexe feature. + * Makefile: Add unzip stuffs. + +1999-03-06 Roland McGrath <roland@baalperazim.frob.com> + + * def_pager_setup.c (default_pager_setup): #if 0 out unused function. + + * default_pager.c (default_pager_paging_file): Pass 0 for new + LINUX_SIGNATURE arg to add_paging_file. + + * bootstrap.c (main: scrript_paging_file): Pass LINUX_SIGNATURE arg + through to add_paging_file. + +1999-02-27 Roland McGrath <roland@baalperazim.frob.com> + + * def_pager_setup.c (add_paging_file): New arg CHECK_LINUX_SIGNATURE, + pass down to create_paging_partition. + * default_pager.c (create_paging_partition): New arg + CHECK_LINUX_SIGNATURE, pass down new_partition. + (new_partition): New arg CHECK_LINUX_SIGNATURE: if not < 0, + check first page of swap for Linux 2.0 or 2.2 signature page and obey + its bad-block map; if > 0, refuse the partition if no signature found. + * bootstrap.c (main): Add new boot script functions + `add-raw-paging-file', `add-linux-paging-file'. Make those + and `add-paging-file' all call add_paging_file with new + LINUX_SIGNATURE arg of -1, 1, and 0, respectively. + * default_pager.c (create_paging_partition): If new_partition returns + null, return and do nothing more. + +1998-07-25 Roland McGrath <roland@baalperazim.frob.com> + + * default_pager.c (pager_read_offset): Cast NO_BLOCK twice, to real + return type of this function. + +Tue May 12 12:11:36 1998 Thomas Bushnell, n/BSG <tb@mit.edu> + + * bootstrap.c (parse_script): Free BUF before returning. Reported + by Katusya Tanaka (wyvern@pb3.so-net.ne.jp). + +1997-07-15 Miles Bader <miles@gnu.ai.mit.edu> + + * bootstrap.c (main): Rearrange default pager initialization. + + * bootstrap.c (safe_gets): Use strchr instead of index. + : Include <string.h> + + * bootstrap.c (main): Change "(bootstrap)" to "(serverboot)" in msgs. + * load.c (boot_script_exec_cmd): Likewise. + +1997-07-09 Miles Bader <miles@gnu.ai.mit.edu> + + * bootstrap.c (main): Don't exit when no paging partition. + +1997-07-08 Miles Bader <miles@gnu.ai.mit.edu> + + * bootstrap.c: Don't include "translate_root.h". + (main): Don't call translate_root. + Pass more than a single character to safe_gets. + (DEFAULT_ROOT): New macro. + (safe_gets): Make sure a newline exists before removing it. + Ensure that the cursor moves to the beginning of the next line. + * translate_root.c, translate_root.h: Files removed. + * Makefile (SRCS): Remove translate_root.c. + (LCLHDRS): Remove translate_root.h. + +Fri Jun 20 15:37:15 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * bootstrap.c (main/script_paging_file): Instead of returning an + error use new variable `had_a_partition' to record whether we + successfully opened a partition. + (main/script_default_pager): Only start pager if HAD_A_PARTITION; + otherwise print warning message. + +Mon Jun 16 11:52:40 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * Makefile (installationdir): Use `=' to set this instead of :=; + $(prefix) is not yet available and `=' postpones the evaluation + properly. Reported by Marcus G. Daniels, + marcus@cathcart.sysc.pdx.edu. + +Tue Jun 10 21:54:52 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * disk_inode.h (struct icommon): Use short instead of uid_t/gid_t + in structure definition; those are now 32 bit types. + + * bootstrap.c (main): Support running from command line too; this + is useful at least for simple debugging. + +1997-06-09 Miles Bader <miles@gnu.ai.mit.edu> + + * Makefile (LCLHDRS): Add mach-exec.h. + +Thu May 1 18:40:53 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * bootstrap.c (cthread_stack_size): Don't set special value here; + use the default. + +Wed Apr 30 12:01:53 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * default_pager.c (default_pager_default_thread): Now that this is + forked, rather than run directly by the first thread, we have to + gain kernel privileges to acquire reserved pages. + +Fri Apr 18 16:44:49 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * wiring.c (__vm_allocate): New function. + +Wed Apr 16 14:18:28 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * default_pager.c (debug): Turn off. + +Mon Apr 14 12:50:20 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * default_pager.c: Remove debugging printfs. + +Thu Apr 10 15:10:25 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * Makefile (installationdir): Set variable. + +Wed Apr 9 13:57:44 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * default_pager.c (default_pager_default_thread): New function. + Delete old variable with this name, it was never used. + (default_pager): Fork default_pager_default_thread instead of + doing it here to avoid the possibility that we are on a tiny + stack. Current thread dies. + +Thu Apr 3 20:00:58 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * elf-load.c (exec_load): Include <elf.h> instead of + <mach/exec/elf.h>. Include "mach-exec.h" instead of + <mach/exec/elf.h>. + (exec_load) [i386]: Check for i386 types directly; abandon old + MY_EI_DATA and MY_E_MACHINE. + * load.c: Include "mach-exec.h" instead of <mach/exec/elf.h>. + * exec.c: Likewise. + * mach-exec.h: New file. + +Wed Mar 19 14:45:27 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * panic.c (panic): Clear possible errors on stdout before printing + panic string. + +Mon Mar 17 13:13:50 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * wiring.c (wire_all_memory): Don't attempt wire if PROTECTION is + VM_PROT_NONE. + + * panic.c (panic): Be more informative about where the error is + coming from. + + * default_pager.c (create_paging_partition): Don't print + gratuitous output noise. + * load.c (boot_script_exec_cmd): Likewise. + +Wed Mar 12 10:53:00 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * ext2_file_io.c (ext2_open_file): Clear FP before beginning + work. + * ffs_file_io.c (ffs_open_file): Likewise. + * minix_file_io.c (minix_open_file): Likewise. + + * bootstrap.c (printf_init, safe_gets): New functions. + * console.c: Deleted file. + * Makefile (SRCS): Omit console.c and gets.c. + + * load.c (read_symtab_from_file): Comment out body of function. + We don't want this. + + * defs.h: Comment out redefinitions of common types. + + * default_pager.c: Include <cthreads.h> instead of + <mach/cthreads.h>. + * file_io.h: Likewise. + * kalloc.c: Likewise. + + * panic.c: Include <varargs.h> instead of <sys/varargs.h>. + + * default_pager.c (pager_read_offset): Cast return of NO_BLOCK + properly. + +Mon Mar 10 17:07:50 1997 Thomas Bushnell, n/BSG <thomas@gnu.ai.mit.edu> + + * load.c: Find boot_script.h in ../boot. + * bootstrap.c: Likewise. + + * bootstrap.c (boot_panic): Repair syntax. + + * strfcns.c: Include <varargs.h> instead of <sys/varargs.h>. + * load.c: Likewise. diff --git a/serverboot/Makefile b/serverboot/Makefile new file mode 100644 index 00000000..b274717e --- /dev/null +++ b/serverboot/Makefile @@ -0,0 +1,52 @@ +# Copyright (C) 1997, 1999 Free Software Foundation, Inc. +# This file is part of the GNU Hurd. +# +# The GNU Hurd is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# The GNU Hurd is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with the GNU Hurd; see the file COPYING. If not, write to +# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +dir := serverboot +makemode := utility + +SRCS = bootstrap.c ffs_compat.c load.c wiring.c def_pager_setup.c \ + ffs_file_io.c minix_ffs_compat.c default_pager.c file_io.c\ + minix_file_io.c ext2_file_io.c kalloc.c strfcns.c exec.c \ + panic.c elf-load.c gunzip.c bunzip2.c +LCLHDRS = assert.h disk_inode_ffs.h fs.h queue.h defs.h \ + minix_ffs_compat.h wiring.h dir.h ffs_compat.h minix_fs.h \ + disk_inode.h file_io.h minix_super.h mach-exec.h +target = serverboot +HURDLIBS = threads +installationdir = $(prefix)/boot + +UNZIP_OBJS = unzip.o inflate.o util.o do-bunzip2.o +OBJS = $(subst .c,.o,$(SRCS)) boot_script.o memory_objectServer.o \ + default_pagerServer.o excServer.o bootstrapServer.o \ + memory_object_defaultServer.o $(UNZIP_OBJS) + +vpath boot_script.c $(srcdir)/../boot + +# Look for zip stuff +VPATH += $(srcdir)/../exec +# If SMALL_BZIP2 is defined, use relatively small memory. +# It's crucial for serverboot, because swap is not enabled yet. +CPPFLAGS += -I$(srcdir)/../exec -DGZIP -DBZIP2 -DSMALL_BZIP2 + +MIGSFLAGS = -DSEQNOS + +LDFLAGS += -static + +include ../Makeconf + +# Don't even bother. +CFLAGS := $(filter-out -Wall,$(CFLAGS)) diff --git a/serverboot/assert.h b/serverboot/assert.h new file mode 100644 index 00000000..9bcab69e --- /dev/null +++ b/serverboot/assert.h @@ -0,0 +1,50 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _ASSERT_H_ +#define _ASSERT_H_ + +#ifdef ASSERTIONS +extern void Assert(); + +#define assert(ex) \ + do { \ + if (!(ex)) \ + Assert(__FILE__, __LINE__); \ + } while (0) + +#ifdef lint +#define assert_static(x) +#else lint +#define assert_static(x) assert(x) +#endif lint + +#else /* ASSERTIONS */ +#define assert(ex) +#define assert_static(ex) +#endif /* ASSERTIONS */ + +#endif /* _ASSERT_H_ */ diff --git a/serverboot/bootstrap.c b/serverboot/bootstrap.c new file mode 100644 index 00000000..46935a26 --- /dev/null +++ b/serverboot/bootstrap.c @@ -0,0 +1,485 @@ +/* + * Mach Operating System + * Copyright (c) 1992,1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ +/* + * Bootstrap the various built-in servers. + */ + +#include <mach.h> +#include <mach/message.h> +#include <sys/reboot.h> + +#include <file_io.h> + +#include <stdio.h> +#include <string.h> + +#include "../boot/boot_script.h" + +#if 0 +/* + * Use 8 Kbyte stacks instead of the default 64K. + * Use 4 Kbyte waiting stacks instead of the default 8K. + */ +#if defined(alpha) +vm_size_t cthread_stack_size = 16 * 1024; +#else +vm_size_t cthread_stack_size = 8 * 1024; +#endif +#endif + +extern +vm_size_t cthread_wait_stack_size; + +mach_port_t bootstrap_master_device_port; /* local name */ +mach_port_t bootstrap_master_host_port; /* local name */ + +int boot_load_program(); + +char *root_name; +char boot_script_name[MAXPATHLEN]; + +extern void default_pager(); +extern void default_pager_initialize(); +extern void default_pager_setup(); + +/* initialized in default_pager_initialize */ +extern mach_port_t default_pager_exception_port; +extern mach_port_t default_pager_bootstrap_port; + +/* + * Convert ASCII to integer. + */ +int atoi(str) + register const char *str; +{ + register int n; + register int c; + int is_negative = 0; + + n = 0; + while (*str == ' ') + str++; + if (*str == '-') { + is_negative = 1; + str++; + } + while ((c = *str++) >= '0' && c <= '9') { + n = n * 10 + (c - '0'); + } + if (is_negative) + n = -n; + return (n); +} + +__main () +{ +} + +static void +boot_panic (kern_return_t err) +{ +#define PFX "bootstrap: " + char *err_string = boot_script_error_string (err); + char panic_string[strlen (err_string) + sizeof (PFX)]; + strcpy (panic_string, PFX); + strcat (panic_string, err_string); + panic (panic_string); +#undef PFX +} + +void +safe_gets (char *str, int maxlen) +{ + char *c = fgets (str, maxlen, stdin); + if (c == 0) { + perror ("fgets"); + panic ("cannot read from console"); + } + c = strchr (c, '\n'); + if (c) + *c = '\0'; + printf ("\r\n"); +} + +printf_init (device_t master) +{ + mach_port_t cons; + kern_return_t rc; + rc = device_open (master, D_READ|D_WRITE, "console", &cons); + if (rc) + while (1) { + volatile int x = 0; + (void) host_reboot(bootstrap_master_host_port, RB_DEBUGGER); + x = x / x; + } + stdin = mach_open_devstream (cons, "r"); + stdout = stderr = mach_open_devstream (cons, "w"); + mach_port_deallocate (mach_task_self (), cons); + setbuf (stdout, 0); +} + +/* + * Bootstrap task. + * Runs in user spacep. + * + * Called as 'boot -switches host_port device_port root_name' + * + */ +main(argc, argv) + int argc; + char **argv; +{ + int die = 0; + int script_paging_file (const struct cmd *cmd, int linux_signature) + { + if (add_paging_file (bootstrap_master_device_port, cmd->path, + linux_signature)) + printf ("(serverboot): %s: Cannot add paging file\n", cmd->path); + return 0; + } + int script_add_paging_file (const struct cmd *cmd, int *val) + { + return script_paging_file (cmd, 0); + } + int script_add_raw_paging_file (const struct cmd *cmd, int *val) + { + return script_paging_file (cmd, -1); + } + int script_add_linux_paging_file (const struct cmd *cmd, int *val) + { + return script_paging_file (cmd, 1); + } + int script_serverboot_ctl (const struct cmd *cmd, int *val) + { + const char *const ctl = cmd->path; + if (!strcmp (ctl, "die")) + die = 1; + else + printf ("(serverboot): Unknown control word `%s' ignored\n", ctl); + return 0; + } + + void prompt_for_root () + { + static char new_root[MAXPATHLEN/2]; + + if (!root_name) + root_name = "UNKNOWN"; + printf ("Root device name? [%s] ", root_name); + safe_gets(new_root, sizeof(new_root)); + + if (new_root[0] != '\0') { + root_name = new_root; + (void) strbuild(boot_script_name, + "/dev/", + root_name, + "/boot/servers.boot", + (char *)0); + } + } + + register kern_return_t result; + struct file scriptf; + + task_t my_task = mach_task_self(); + + char *flag_string; + + boolean_t ask_boot_script = 0; + boolean_t ask_root_name = 0; + + /* + * Use 4Kbyte cthread wait stacks. + */ + cthread_wait_stack_size = 4 * 1024; + + /* + * Arg 1 is flags + */ + if (argv[1][0] != '-') + panic("bootstrap: no flags"); + + flag_string = argv[1]; + + /* + * Parse the arguments. + */ + if (argc >= 5) + { + /* + * Arg 0 is program name + */ + + /* + * Arg 2 is host port number + */ + bootstrap_master_host_port = atoi(argv[2]); + + /* + * Arg 3 is device port number + */ + bootstrap_master_device_port = atoi(argv[3]); + + /* + * Arg 4 is root name + */ + root_name = argv[4]; + } + else if (argc == 3) + { + root_name = argv[2]; + + get_privileged_ports (&bootstrap_master_host_port, + &bootstrap_master_device_port); + } + + printf_init(bootstrap_master_device_port); +#ifdef pleasenoXXX + panic_init(bootstrap_master_host_port); +#endif + + + /* + * If the '-a' (ask) switch was specified, or if no + * root device was specificed, ask for the root device. + */ + + if (!root_name || root_name [0] == '\0' || index(flag_string, 'a')) + prompt_for_root (); + + (void) strbuild(boot_script_name, + "/dev/", + root_name, + "/boot/servers.boot", + (char *)0); + /* + * If the '-q' (query) switch was specified, ask for the + * server boot script. + */ + + if (index(flag_string, 'q')) + ask_boot_script = TRUE; + + while (TRUE) { + if (ask_root_name) + prompt_for_root (); + + if (ask_boot_script) { + char new_boot_script[MAXPATHLEN]; + + printf("Server boot script? [%s] ", boot_script_name); + safe_gets(new_boot_script, sizeof(new_boot_script)); + if (new_boot_script[0] != '\0') + strcpy(boot_script_name, new_boot_script); + } + + result = open_file(bootstrap_master_device_port, + boot_script_name, + &scriptf); + if (result == D_NO_SUCH_DEVICE) + { + printf ("Root device `%s' does not exist!\n", root_name); + ask_root_name = ask_boot_script = TRUE; + continue; + } + else + ask_root_name = FALSE; + if (result != 0) { + printf("Can't open server boot script %s: %s\n", + boot_script_name, + strerror (result)); + ask_boot_script = TRUE; + continue; + } + break; + } + + /* + * If the server boot script name was changed, + * then use the new device name as the root device. + */ + { + char *dev, *end; + int len; + + dev = boot_script_name; + if (strncmp(dev, "/dev/", 5) == 0) + dev += 5; + end = strchr(dev, '/'); + len = end ? end-dev : strlen(dev); + memcpy(root_name, dev, len); + root_name[len] = 0; + } + + /* + * Set up the default pager. + */ + partition_init(); + + { + char *cmdline; + + /* Initialize boot script variables. */ + if (boot_script_set_variable ("host-port", VAL_PORT, + (int) bootstrap_master_host_port) + || boot_script_set_variable ("device-port", VAL_PORT, + (int) bootstrap_master_device_port) + || boot_script_set_variable ("root-device", VAL_STR, + (int) root_name) + || boot_script_set_variable ("boot-args", VAL_STR, + (int) flag_string) + || boot_script_define_function ("add-paging-file", VAL_NONE, + &script_add_paging_file) + || boot_script_define_function ("add-raw-paging-file", VAL_NONE, + &script_add_raw_paging_file) + || boot_script_define_function ("add-linux-paging-file", + VAL_NONE, + &script_add_linux_paging_file) + || boot_script_define_function ("serverboot", + VAL_NONE, + &script_serverboot_ctl) + ) + panic ("bootstrap: error setting boot script variables"); + + cmdline = getenv ("MULTIBOOT_CMDLINE"); + if (cmdline != NULL + && boot_script_set_variable ("kernel-command-line", + VAL_STR, + (int) cmdline)) + panic ("bootstrap: error setting boot script variables"); + + parse_script (&scriptf); + close_file (&scriptf); + } + + if (index (flag_string, 'd')) + { + char xx[5]; + printf ("Hit return to boot..."); + safe_gets (xx, sizeof xx); + } + + /* + * task_set_exception_port and task_set_bootstrap_port + * both require a send right. + */ + (void) mach_port_insert_right(my_task, default_pager_bootstrap_port, + default_pager_bootstrap_port, + MACH_MSG_TYPE_MAKE_SEND); + (void) mach_port_insert_right(my_task, default_pager_exception_port, + default_pager_exception_port, + MACH_MSG_TYPE_MAKE_SEND); + + /* + * Change our exception port. + */ + (void) task_set_exception_port(my_task, default_pager_exception_port); + + result = boot_script_exec (); + + if (result) + boot_panic (result); + +#if 0 + { + /* + * Delete the old stack (containing only the arguments). + */ + vm_offset_t addr = (vm_offset_t) argv; + + vm_offset_t r_addr; + vm_size_t r_size; + vm_prot_t r_protection, r_max_protection; + vm_inherit_t r_inheritance; + boolean_t r_is_shared; + memory_object_name_t r_object_name; + vm_offset_t r_offset; + kern_return_t kr; + + r_addr = addr; + + kr = vm_region(my_task, + &r_addr, + &r_size, + &r_protection, + &r_max_protection, + &r_inheritance, + &r_is_shared, + &r_object_name, + &r_offset); + if ((kr == KERN_SUCCESS) && MACH_PORT_VALID(r_object_name)) + (void) mach_port_deallocate(my_task, r_object_name); + if ((kr == KERN_SUCCESS) && + (r_addr <= addr) && + ((r_protection & (VM_PROT_READ|VM_PROT_WRITE)) == + (VM_PROT_READ|VM_PROT_WRITE))) + (void) vm_deallocate(my_task, r_addr, r_size); + } +#endif + + if (die) + { + printf ("(serverboot): terminating, not becoming default pager\n"); + while (1) + task_terminate (mach_task_self ()); + } + + default_pager_initialize (bootstrap_master_host_port); + + /* + * Become the default pager + */ + default_pager(); + /*NOTREACHED*/ +} + +/* Parse the boot script. */ +parse_script (struct file *f) +{ + char *p, *line, *buf; + int amt, fd, err; + int n = 0; + + buf = malloc (f->f_size + 1); /* add one for null terminator we will write */ + err = read_file (f, 0, buf, f->f_size, 0); + if (err) + panic ("bootstrap: error reading boot script file: %s", strerror (err)); + + line = p = buf; + while (1) + { + while (p < buf + f->f_size && *p != '\n') + p++; + *p = '\0'; + err = boot_script_parse_line (line); + if (err) + boot_panic (err); + if (p == buf + f->f_size) + break; + line = ++p; + } + free (buf); +} diff --git a/serverboot/bunzip2.c b/serverboot/bunzip2.c new file mode 100644 index 00000000..9f79ade5 --- /dev/null +++ b/serverboot/bunzip2.c @@ -0,0 +1,169 @@ +/* Modified by okuji@kuicr.kyoto-u.ac.jp for use in serverboot. */ +/* Decompressing store backend + + Copyright (C) 1997 Free Software Foundation, Inc. + Written by Miles Bader <miles@gnu.ai.mit.edu> + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <stdio.h> +#include <string.h> +#include <setjmp.h> +#include <cthreads.h> +#include <errno.h> + +#include <file_io.h> + +#define IN_BUFFERING (256*1024) +#define OUT_BUFFERING (512*1024) + +static struct mutex bunzip2_lock = MUTEX_INITIALIZER; + +/* Uncompress the contents of FROM, which should contain a valid bzip2 file, + into memory, returning the result buffer in BUF & BUF_LEN. */ +int +serverboot_bunzip2 (struct file *from, void **buf, size_t *buf_len) +{ + /* Callbacks from do_bunzip2 for I/O and error interface. */ + extern int (*unzip_read) (char *buf, size_t maxread); + extern void (*unzip_write) (const char *buf, size_t nwrite); + extern void (*unzip_read_error) (void); + extern void (*unzip_error) (const char *msg); + + /* How we return errors from our hook functions. */ + jmp_buf zerr_jmp_buf; + int zerr; + + size_t offset = 0; /* Offset of read point in FROM. */ + + /* Read at most MAXREAD (or 0 if eof) bytes into BUF from our current + position in FROM. */ + int zread (char *buf, size_t maxread) + { + vm_size_t resid; + size_t did_read; + + if (from->f_size - offset < maxread) + did_read = from->f_size - offset; + else + did_read = maxread; + + zerr = read_file (from, offset, buf, did_read, &resid); + if (zerr) + longjmp (zerr_jmp_buf, 1); + + did_read -= resid; + offset += did_read; + + return did_read; + } + + size_t out_buf_offs = 0; /* Position in the output buffer. */ + + /* Write uncompress data to our output buffer. */ + void zwrite (const char *wbuf, size_t nwrite) + { + size_t old_buf_len = *buf_len; + + if (out_buf_offs + nwrite > old_buf_len) + /* Have to grow the output buffer. */ + { + void *old_buf = *buf; + void *new_buf = old_buf + old_buf_len; /* First try. */ + size_t new_buf_len = round_page (old_buf_len + old_buf_len + nwrite); + + /* Try to grow the buffer. */ + zerr = + vm_allocate (mach_task_self (), + (vm_address_t *)&new_buf, new_buf_len - old_buf_len, + 0); + if (zerr) + /* Can't do that, try to make a bigger buffer elsewhere. */ + { + new_buf = old_buf; + zerr = + vm_allocate (mach_task_self (), + (vm_address_t *)&new_buf, new_buf_len, 1); + if (zerr) + longjmp (zerr_jmp_buf, 1); + + if (out_buf_offs > 0) + /* Copy the old buffer into the start of the new & free it. */ + bcopy (old_buf, new_buf, out_buf_offs); + + vm_deallocate (mach_task_self (), + (vm_address_t)old_buf, old_buf_len); + + *buf = new_buf; + } + + *buf_len = new_buf_len; + } + + bcopy (wbuf, *buf + out_buf_offs, nwrite); + out_buf_offs += nwrite; + } + + void zreaderr (void) + { + zerr = EIO; + longjmp (zerr_jmp_buf, 1); + } + void zerror (const char *msg) + { + zerr = EINVAL; + longjmp (zerr_jmp_buf, 2); + } + + /* Try to guess a reasonable output buffer size. */ + *buf_len = round_page (from->f_size * 2); + zerr = vm_allocate (mach_task_self (), (vm_address_t *)buf, *buf_len, 1); + if (zerr) + return zerr; + + mutex_lock (&bunzip2_lock); + + unzip_read = zread; + unzip_write = zwrite; + unzip_read_error = zreaderr; + unzip_error = zerror; + + if (! setjmp (zerr_jmp_buf)) + { + /* Call the bunzip2 engine. */ + do_bunzip2 (); + zerr = 0; + } + + mutex_unlock (&bunzip2_lock); + + if (zerr) + { + if (*buf_len > 0) + vm_deallocate (mach_task_self (), (vm_address_t)*buf, *buf_len); + } + else if (out_buf_offs < *buf_len) + /* Trim the output buffer to be the right length. */ + { + size_t end = round_page (out_buf_offs); + if (end < *buf_len) + vm_deallocate (mach_task_self (), + (vm_address_t)(*buf + end), *buf_len - end); + *buf_len = out_buf_offs; + } + + return zerr; +} diff --git a/serverboot/def_pager_setup.c b/serverboot/def_pager_setup.c new file mode 100644 index 00000000..8834a379 --- /dev/null +++ b/serverboot/def_pager_setup.c @@ -0,0 +1,140 @@ +/* + * Mach Operating System + * Copyright (c) 1992-1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +#include <mach.h> + +#include <file_io.h> + +extern void *kalloc(); + +/* + * Create a paging partition given a file name + */ +extern void create_paging_partition(); + +kern_return_t +add_paging_file(master_device_port, file_name, linux_signature) + mach_port_t master_device_port; + char *file_name; + int linux_signature; +{ + register struct file_direct *fdp; + register kern_return_t result; + struct file pfile; + boolean_t isa_file; + + bzero((char *) &pfile, sizeof(struct file)); + + result = open_file(master_device_port, file_name, &pfile); + if (result != KERN_SUCCESS) + return result; + + fdp = (struct file_direct *) kalloc(sizeof *fdp); + bzero((char *) fdp, sizeof *fdp); + + isa_file = file_is_structured(&pfile); + + result = open_file_direct(pfile.f_dev, fdp, isa_file); + if (result) + panic("Can't open paging file %s: %s\n", + file_name, strerror (result)); + + result = add_file_direct(fdp, &pfile); + if (result) + panic("Can't read disk addresses: %s\n", strerror (result)); + + close_file(&pfile); + + /* + * Set up the default paging partition + */ + create_paging_partition(file_name, fdp, isa_file, linux_signature); + + return result; +} + +/* + * Destroy a paging_partition given a file name + */ +kern_return_t +remove_paging_file(file_name) + char *file_name; +{ + struct file_direct *fdp = 0; + kern_return_t kr; + + kr = destroy_paging_partition(file_name, &fdp); + if (kr == KERN_SUCCESS) { + remove_file_direct(fdp); + kfree(fdp, sizeof(*fdp)); + } + return kr; +} + +#if 0 /* no longer used */ +/* + * Set up default pager + */ +extern char *strbuild(); + +boolean_t +default_pager_setup(master_device_port, server_dir_name) + mach_port_t master_device_port; + char *server_dir_name; +{ + register kern_return_t result; + + char paging_file_name[MAXPATHLEN+1]; + + (void) strbuild(paging_file_name, + server_dir_name, + "/paging_file", + (char *)0); + + while (TRUE) { + result = add_paging_file(master_device_port, paging_file_name); + if (result == KERN_SUCCESS) + break; + printf("Can't open paging file %s: %d\n", + paging_file_name, + result); + + bzero(paging_file_name, sizeof(paging_file_name)); + printf("Paging file name ? "); + safe_gets(paging_file_name, sizeof(paging_file_name)); + + if (paging_file_name[0] == 0) { + printf("*** WARNING: running without paging area!\n"); + return FALSE; + } + } + + /* + * Our caller will become the default pager - later + */ + + return TRUE; +} +#endif diff --git a/serverboot/default_pager.c b/serverboot/default_pager.c new file mode 100644 index 00000000..198bb224 --- /dev/null +++ b/serverboot/default_pager.c @@ -0,0 +1,3844 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Default pager. Pages to paging partition. + * + * MUST BE ABLE TO ALLOCATE WIRED-DOWN MEMORY!!! + */ + +#include <mach.h> +#include <mach/message.h> +#include <mach/notify.h> +#include <mach/mig_errors.h> +#include <mach/thread_switch.h> +#include <mach/task_info.h> +#include <mach/default_pager_types.h> + +#include <cthreads.h> + +#include <device/device_types.h> +#include <device/device.h> + +#include <queue.h> +#include <wiring.h> + +#include <assert.h> +#include <stdio.h> + +#include "file_io.h" + +#define debug 0 + +extern void *kalloc(); + +static char my_name[] = "(default pager):"; + +static struct mutex printf_lock = MUTEX_INITIALIZER; + +#define dprintf(f, x...) \ + ({ mutex_lock (&printf_lock); printf (f , ##x); fflush (stdout); mutex_unlock (&printf_lock); }) +#define ddprintf(f, x...) ((void)0) + +/* + * parallel vs serial switch + */ +#define PARALLEL 1 + +#if 0 +#define CHECKSUM 1 +#endif + +#define USE_PRECIOUS 1 + +#define ptoa(p) ((p)*vm_page_size) +#define atop(a) ((a)/vm_page_size) + +/* + + */ +/* + * Bitmap allocation. + */ +typedef unsigned int bm_entry_t; +#define NB_BM 32 +#define BM_MASK 0xffffffff + +#define howmany(a,b) (((a) + (b) - 1)/(b)) + +/* + * Value to indicate no block assigned + */ +#define NO_BLOCK ((vm_offset_t)-1) + +/* + * 'Partition' structure for each paging area. + * Controls allocation of blocks within paging area. + */ +struct part { + struct mutex p_lock; /* for bitmap/free */ + vm_size_t total_size; /* total number of blocks */ + vm_size_t free; /* number of blocks free */ + unsigned int id; /* named lookup */ + bm_entry_t *bitmap; /* allocation map */ + boolean_t going_away; /* destroy attempt in progress */ + struct file_direct *file; /* file paged to */ +}; +typedef struct part *partition_t; + +struct { + struct mutex lock; + int n_partitions; + partition_t *partition_list;/* array, for quick mapping */ +} all_partitions; /* list of all such */ + +typedef unsigned char p_index_t; + +#define P_INDEX_INVALID ((p_index_t)-1) + +#define no_partition(x) ((x) == P_INDEX_INVALID) + +partition_t partition_of(x) + int x; +{ + if (x >= all_partitions.n_partitions || x < 0) + panic("partition_of x%x", x); + return all_partitions.partition_list[x]; +} + +void set_partition_of(x, p) + int x; + partition_t p; +{ + if (x >= all_partitions.n_partitions || x < 0) + panic("set_partition_of x%x", x); + all_partitions.partition_list[x] = p; +} + +/* + * Simple mapping from (file)NAME to id + * Saves space, filenames can be long. + */ +unsigned int +part_id(name) + unsigned char *name; +{ + register unsigned int len, id, xorid; + + len = strlen(name); + id = xorid = 0; + while (len--) { + xorid ^= *name; + id += *name++; + } + return (id << 8) | xorid; +} + +partition_init() +{ + mutex_init(&all_partitions.lock); + all_partitions.n_partitions = 0; +} + +static partition_t +new_partition (const char *name, struct file_direct *fdp, + int check_linux_signature) +{ + register partition_t part; + register vm_size_t size, bmsize; + vm_offset_t raddr; + mach_msg_type_number_t rsize; + int rc; + unsigned int id = part_id(name); + + mutex_lock(&all_partitions.lock); + { + unsigned int i; + for (i = 0; i < all_partitions.n_partitions; i++) + { + part = partition_of(i); + if (part && part->id == id) + { + printf ("(default pager): Already paging to partition %s!\n", + name); + mutex_unlock(&all_partitions.lock); + return 0; + } + } + } + mutex_unlock(&all_partitions.lock); + + size = atop(fdp->fd_size * fdp->fd_bsize); + bmsize = howmany(size, NB_BM) * sizeof(bm_entry_t); + + part = (partition_t) kalloc(sizeof(struct part)); + mutex_init(&part->p_lock); + part->total_size = size; + part->free = size; + part->id = id; + part->bitmap = (bm_entry_t *)kalloc(bmsize); + part->going_away= FALSE; + part->file = fdp; + + bzero((char *)part->bitmap, bmsize); + + if (check_linux_signature < 0) + { + printf("(default pager): " + "Paging to raw partition %s (%uk paging space)\n", + name, part->total_size * (vm_page_size / 1024)); + return part; + } + +#define LINUX_PAGE_SIZE 4096 /* size of pages in Linux swap partitions */ + rc = page_read_file_direct(part->file, + 0, LINUX_PAGE_SIZE, + &raddr, + &rsize); + if (rc) + panic("(default pager): cannot read first page of %s! rc=%#x\n", + name, rc); + while (rsize < LINUX_PAGE_SIZE) + { + /* Filesystem block size is smaller than page size, + so we must do several reads to get the whole page. */ + vm_address_t baddr, bsize; + rc = page_read_file_direct(part->file, + rsize, LINUX_PAGE_SIZE-rsize, + &baddr, + &bsize); + if (rc) + panic("(default pager): " + "cannot read first page of %s! rc=%#x at %#x\n", + name, rc, rsize); + + memcpy ((char *) raddr + rsize, (void *) baddr, bsize); + rsize += bsize; + vm_deallocate (mach_task_self (), baddr, bsize); + } + + if (!memcmp("SWAP-SPACE", (char *) raddr + LINUX_PAGE_SIZE-10, 10)) + { + /* The partition's first page has a Linux swap signature. + This means the beginning of the page contains a bitmap + of good pages, and all others are bad. */ + unsigned int i, j, bad, max; + int waste; + + printf("(default pager): Found Linux 2.0 swap signature in %s\n", + name); + + /* The first page, and the pages corresponding to the bits + occupied by the signature in the final 10 bytes of the page, + are always unavailable ("bad"). */ + *(u_int32_t *)raddr &= ~(u_int32_t) 1; + memset((char *) raddr + LINUX_PAGE_SIZE-10, 0, 10); + + max = LINUX_PAGE_SIZE / sizeof(u_int32_t); + if (max > (part->total_size + 31) / 32) + max = (part->total_size + 31) / 32; + + bad = 0; + for (i = 0; i < max; ++i) + { + u_int32_t bm = ((u_int32_t *) raddr)[i]; + if (bm == ~(u_int32_t) 0) + continue; + /* There are some zero bits in this word. */ + for (j = 0; j < 32; ++j) + if ((bm & (1 << j)) == 0) + { + unsigned int p = i*32 + j; + if (p >= part->total_size) + break; + ++bad; + part->bitmap[p / NB_BM] |= 1 << (p % NB_BM); + } + } + part->free -= bad; + + --bad; /* Don't complain about first page. */ + waste = part->total_size - (8 * (LINUX_PAGE_SIZE-10)); + if (waste > 0) + { + /* The wasted pages were already marked "bad". */ + bad -= waste; + if (bad > 0) + printf("\ +(default pager): Paging to %s, %dk swap-space (%dk bad, %dk wasted at end)\n", + name, + part->free * (LINUX_PAGE_SIZE / 1024), + bad * (LINUX_PAGE_SIZE / 1024), + waste * (LINUX_PAGE_SIZE / 1024)); + else + printf("\ +(default pager): Paging to %s, %dk swap-space (%dk wasted at end)\n", + name, + part->free * (LINUX_PAGE_SIZE / 1024), + waste * (LINUX_PAGE_SIZE / 1024)); + } + else if (bad > 0) + printf("\ +(default pager): Paging to %s, %dk swap-space (excludes %dk marked bad)\n", + name, + part->free * (LINUX_PAGE_SIZE / 1024), + bad * (LINUX_PAGE_SIZE / 1024)); + else + printf("\ +(default pager): Paging to %s, %dk swap-space\n", + name, + part->free * (LINUX_PAGE_SIZE / 1024)); + } + else if (!memcmp("SWAPSPACE2", + (char *) raddr + LINUX_PAGE_SIZE-10, 10)) + { + struct + { + u_int8_t bootbits[1024]; + u_int32_t version; + u_int32_t last_page; + u_int32_t nr_badpages; + u_int32_t padding[125]; + u_int32_t badpages[1]; + } *hdr = (void *) raddr; + + printf("\ +(default pager): Found Linux 2.2 swap signature (v%u) in %s...", + hdr->version, name); + + part->bitmap[0] |= 1; /* first page unusable */ + part->free--; + + switch (hdr->version) + { + default: + if (check_linux_signature) + { + printf ("version %u unknown! SKIPPING %s!\n", + hdr->version, + name); + vm_deallocate(mach_task_self(), raddr, rsize); + kfree(part->bitmap, bmsize); + kfree(part, sizeof *part); + return 0; + } + else + printf ("version %u unknown! IGNORING SIGNATURE PAGE!" + " %dk swap-space\n", + hdr->version, + part->free * (LINUX_PAGE_SIZE / 1024)); + break; + + case 1: + { + unsigned int waste, i; + if (hdr->last_page > part->total_size) + { + printf ("signature says %uk, partition has only %uk! ", + hdr->last_page * (LINUX_PAGE_SIZE / 1024), + part->total_size * (LINUX_PAGE_SIZE / 1024)); + waste = 0; + } + else + { + waste = part->total_size - hdr->last_page; + part->total_size = hdr->last_page; + part->free = part->total_size - 1; + } + for (i = 0; i < hdr->nr_badpages; ++i) + { + const u_int32_t bad = hdr->badpages[i]; + part->bitmap[bad / NB_BM] |= 1 << (bad % NB_BM); + part->free--; + } + printf ("%uk swap-space", + part->free * (LINUX_PAGE_SIZE / 1024)); + if (hdr->nr_badpages != 0) + printf (" (excludes %uk marked bad)", + hdr->nr_badpages * (LINUX_PAGE_SIZE / 1024)); + if (waste != 0) + printf (" (excludes %uk at end of partition)", + waste * (LINUX_PAGE_SIZE / 1024)); + printf ("\n"); + } + } + } + else if (check_linux_signature) + { + printf ("(default pager): " + "Cannot find Linux swap signature page! " + "SKIPPING %s (%uk partition)!", + name, part->total_size * (vm_page_size / 1024)); + kfree(part->bitmap, bmsize); + kfree(part, sizeof *part); + part = 0; + } + else + printf("(default pager): " + "Paging to raw partition %s (%uk paging space)\n", + name, part->total_size * (vm_page_size / 1024)); + + vm_deallocate(mach_task_self(), raddr, rsize); + + return part; +} + +/* + * Create a partition descriptor, + * add it to the list of all such. + * size is in BYTES. + */ +void +create_paging_partition(const char *name, + struct file_direct *fdp, int isa_file, + int linux_signature) +{ + register partition_t part; + + part = new_partition (name, fdp, linux_signature); + if (!part) + return; + + mutex_lock(&all_partitions.lock); + { + register int i; + + for (i = 0; i < all_partitions.n_partitions; i++) + if (partition_of(i) == 0) break; + + if (i == all_partitions.n_partitions) { + register partition_t *new_list, *old_list; + register int n; + + n = i ? (i<<1) : 2; + new_list = (partition_t *) + kalloc( n * sizeof(partition_t) ); + if (new_list == 0) no_paging_space(TRUE); + bzero(new_list, n*sizeof(partition_t)); + if (i) { + old_list = all_partitions.partition_list; + bcopy(old_list, new_list, i*sizeof(partition_t)); + } + all_partitions.partition_list = new_list; + all_partitions.n_partitions = n; + if (i) kfree(old_list, i*sizeof(partition_t)); + } + set_partition_of(i, part); + } + mutex_unlock(&all_partitions.lock); + +#if 0 + dprintf("%s Added paging %s %s\n", my_name, + (isa_file) ? "file" : "device", name); +#endif + overcommitted(TRUE, part->free); +} + +/* + * Choose the most appropriate default partition + * for an object of SIZE bytes. + * Return the partition locked, unless + * the object has no CUR_PARTition. + */ +p_index_t +choose_partition(size, cur_part) + unsigned int size; + register p_index_t cur_part; +{ + register partition_t part; + register boolean_t found = FALSE; + register int i; + + mutex_lock(&all_partitions.lock); + for (i = 0; i < all_partitions.n_partitions; i++) { + + /* the undesireable one ? */ + if (i == cur_part) + continue; + +ddprintf ("choose_partition(%x,%d,%d)\n",size,cur_part,i); + /* one that was removed ? */ + if ((part = partition_of(i)) == 0) + continue; + + /* one that is being removed ? */ + if (part->going_away) + continue; + + /* is it big enough ? */ + mutex_lock(&part->p_lock); + if (ptoa(part->free) >= size) { + if (cur_part != P_INDEX_INVALID) { + mutex_unlock(&all_partitions.lock); + return (p_index_t)i; + } else + found = TRUE; + } + mutex_unlock(&part->p_lock); + + if (found) break; + } + mutex_unlock(&all_partitions.lock); + return (found) ? (p_index_t)i : P_INDEX_INVALID; +} + +/* + * Allocate a page in a paging partition + * The partition is returned unlocked. + */ +vm_offset_t +pager_alloc_page(pindex, lock_it) + p_index_t pindex; +{ + register int bm_e; + register int bit; + register int limit; + register bm_entry_t *bm; + partition_t part; + static char here[] = "%spager_alloc_page"; + + if (no_partition(pindex)) + return (NO_BLOCK); +ddprintf ("pager_alloc_page(%d,%d)\n",pindex,lock_it); + part = partition_of(pindex); + + /* unlikely, but possible deadlock against destroy_partition */ + if (!part || part->going_away) + return (NO_BLOCK); + + if (lock_it) + mutex_lock(&part->p_lock); + + if (part->free == 0) { + /* out of paging space */ + mutex_unlock(&part->p_lock); + return (NO_BLOCK); + } + + limit = howmany(part->total_size, NB_BM); + bm = part->bitmap; + for (bm_e = 0; bm_e < limit; bm_e++, bm++) + if (*bm != BM_MASK) + break; + + if (bm_e == limit) + panic(here,my_name); + + /* + * Find and set the proper bit + */ + { + register bm_entry_t b = *bm; + + for (bit = 0; bit < NB_BM; bit++) + if ((b & (1<<bit)) == 0) + break; + if (bit == NB_BM) + panic(here,my_name); + + *bm = b | (1<<bit); + part->free--; + + } + + mutex_unlock(&part->p_lock); + + return (bm_e*NB_BM+bit); +} + +/* + * Deallocate a page in a paging partition + */ +void +pager_dealloc_page(pindex, page, lock_it) + p_index_t pindex; + register vm_offset_t page; +{ + register partition_t part; + register int bit, bm_e; + + /* be paranoid */ + if (no_partition(pindex)) + panic("%sdealloc_page",my_name); +ddprintf ("pager_dealloc_page(%d,%x,%d)\n",pindex,page,lock_it); + part = partition_of(pindex); + + if (page >= part->total_size) + panic("%sdealloc_page",my_name); + + bm_e = page / NB_BM; + bit = page % NB_BM; + + if (lock_it) + mutex_lock(&part->p_lock); + + part->bitmap[bm_e] &= ~(1<<bit); + part->free++; + + if (lock_it) + mutex_unlock(&part->p_lock); +} + +/* + + */ +/* + * Allocation info for each paging object. + * + * Most operations, even pager_write_offset and pager_put_checksum, + * just need a read lock. Higher-level considerations prevent + * conflicting operations on a single page. The lock really protects + * the underlying size and block map memory, so pager_extend needs a + * write lock. + * + * An object can now span multiple paging partitions. The allocation + * info we keep is a pair (offset,p_index) where the index is in the + * array of all partition ptrs, and the offset is partition-relative. + * Size wise we are doing ok fitting the pair into a single integer: + * the offset really is in pages so we have vm_page_size bits available + * for the partition index. + */ +#define DEBUG_READER_CONFLICTS 0 + +#if DEBUG_READER_CONFLICTS +int default_pager_read_conflicts = 0; +#endif + +union dp_map { + + struct { + unsigned int p_offset : 24, + p_index : 8; + } block; + + union dp_map *indirect; +}; +typedef union dp_map *dp_map_t; + +/* quick check for part==block==invalid */ +#define no_block(e) ((e).indirect == (dp_map_t)NO_BLOCK) +#define invalidate_block(e) ((e).indirect = (dp_map_t)NO_BLOCK) + +struct dpager { + struct mutex lock; /* lock for extending block map */ + /* XXX should be read-write lock */ +#if DEBUG_READER_CONFLICTS + int readers; + boolean_t writer; +#endif + dp_map_t map; /* block map */ + vm_size_t size; /* size of paging object, in pages */ + p_index_t cur_partition; +#ifdef CHECKSUM + vm_offset_t *checksum; /* checksum - parallel to block map */ +#define NO_CHECKSUM ((vm_offset_t)-1) +#endif CHECKSUM +}; +typedef struct dpager *dpager_t; + +/* + * A paging object uses either a one- or a two-level map of offsets + * into a paging partition. + */ +#define PAGEMAP_ENTRIES 64 + /* number of pages in a second-level map */ +#define PAGEMAP_SIZE(npgs) ((npgs)*sizeof(vm_offset_t)) + +#define INDIRECT_PAGEMAP_ENTRIES(npgs) \ + ((((npgs)-1)/PAGEMAP_ENTRIES) + 1) +#define INDIRECT_PAGEMAP_SIZE(npgs) \ + (INDIRECT_PAGEMAP_ENTRIES(npgs) * sizeof(vm_offset_t *)) +#define INDIRECT_PAGEMAP(size) \ + (size > PAGEMAP_ENTRIES) + +#define ROUNDUP_TO_PAGEMAP(npgs) \ + (((npgs) + PAGEMAP_ENTRIES - 1) & ~(PAGEMAP_ENTRIES - 1)) + +/* + * Object sizes are rounded up to the next power of 2, + * unless they are bigger than a given maximum size. + */ +vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */ + +/* + * Attach a new paging object to a paging partition + */ +void +pager_alloc(pager, part, size) + register dpager_t pager; + p_index_t part; + register vm_size_t size; /* in BYTES */ +{ + register int i; + register dp_map_t mapptr, emapptr; + + mutex_init(&pager->lock); +#if DEBUG_READER_CONFLICTS + pager->readers = 0; + pager->writer = FALSE; +#endif + pager->cur_partition = part; + + /* + * Convert byte size to number of pages, then increase to the nearest + * power of 2. + */ + size = atop(size); + if (size <= atop(max_doubled_size)) { + i = 1; + while (i < size) + i <<= 1; + size = i; + } else + size = ROUNDUP_TO_PAGEMAP(size); + + /* + * Allocate and initialize the block map + */ + { + register vm_size_t alloc_size; + dp_map_t init_value; + + if (INDIRECT_PAGEMAP(size)) { + alloc_size = INDIRECT_PAGEMAP_SIZE(size); + init_value = (dp_map_t)0; + } else { + alloc_size = PAGEMAP_SIZE(size); + init_value = (dp_map_t)NO_BLOCK; + } + + mapptr = (dp_map_t) kalloc(alloc_size); + for (emapptr = &mapptr[(alloc_size-1) / sizeof(vm_offset_t)]; + emapptr >= mapptr; + emapptr--) + emapptr->indirect = init_value; + + } + pager->map = mapptr; + pager->size = size; + +#ifdef CHECKSUM + if (INDIRECT_PAGEMAP(size)) { + mapptr = (vm_offset_t *) + kalloc(INDIRECT_PAGEMAP_SIZE(size)); + for (i = INDIRECT_PAGEMAP_ENTRIES(size); --i >= 0;) + mapptr[i] = 0; + } else { + mapptr = (vm_offset_t *) kalloc(PAGEMAP_SIZE(size)); + for (i = 0; i < size; i++) + mapptr[i] = NO_CHECKSUM; + } + pager->checksum = mapptr; +#endif CHECKSUM +} + +/* + * Return size (in bytes) of space actually allocated to this pager. + * The pager is read-locked. + */ + +vm_size_t +pager_allocated(pager) + register dpager_t pager; +{ + vm_size_t size; + register dp_map_t map, emap; + vm_size_t asize; + + size = pager->size; /* in pages */ + asize = 0; /* allocated, in pages */ + map = pager->map; + + if (INDIRECT_PAGEMAP(size)) { + for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)]; + map < emap; map++) { + + register dp_map_t map2, emap2; + + if ((map2 = map->indirect) == 0) + continue; + + for (emap2 = &map2[PAGEMAP_ENTRIES]; + map2 < emap2; map2++) + if ( ! no_block(*map2) ) + asize++; + + } + } else { + for (emap = &map[size]; map < emap; map++) + if ( ! no_block(*map) ) + asize++; + } + + return ptoa(asize); +} + +/* + * Find offsets (in the object) of pages actually allocated to this pager. + * Returns the number of allocated pages, whether or not they all fit. + * The pager is read-locked. + */ + +unsigned int +pager_pages(pager, pages, numpages) + dpager_t pager; + register default_pager_page_t *pages; + unsigned int numpages; +{ + vm_size_t size; + dp_map_t map, emap; + unsigned int actual; + vm_offset_t offset; + + size = pager->size; /* in pages */ + map = pager->map; + actual = 0; + offset = 0; + + if (INDIRECT_PAGEMAP(size)) { + for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)]; + map < emap; map++) { + + register dp_map_t map2, emap2; + + if ((map2 = map->indirect) == 0) { + offset += vm_page_size * PAGEMAP_ENTRIES; + continue; + } + for (emap2 = &map2[PAGEMAP_ENTRIES]; + map2 < emap2; map2++) + if ( ! no_block(*map2) ) { + if (actual++ < numpages) + pages++->dpp_offset = offset; + } + offset += vm_page_size; + } + } else { + for (emap = &map[size]; map < emap; map++) + if ( ! no_block(*map) ) { + if (actual++ < numpages) + pages++->dpp_offset = offset; + } + offset += vm_page_size; + } + return actual; +} + +/* + * Extend the map for a paging object. + * + * XXX This implementation can allocate an arbitrary large amount + * of wired memory when extending a big block map. Because vm-privileged + * threads call pager_extend, this can crash the system by exhausting + * system memory. + */ +void +pager_extend(pager, new_size) + register dpager_t pager; + register vm_size_t new_size; /* in pages */ +{ + register dp_map_t new_mapptr; + register dp_map_t old_mapptr; + register int i; + register vm_size_t old_size; + + mutex_lock(&pager->lock); /* XXX lock_write */ +#if DEBUG_READER_CONFLICTS + pager->writer = TRUE; +#endif + /* + * Double current size until we cover new size. + * If object is 'too big' just use new size. + */ + old_size = pager->size; + + if (new_size <= atop(max_doubled_size)) { + i = old_size; + while (i < new_size) + i <<= 1; + new_size = i; + } else + new_size = ROUNDUP_TO_PAGEMAP(new_size); + + if (INDIRECT_PAGEMAP(old_size)) { + /* + * Pager already uses two levels. Allocate + * a larger indirect block. + */ + new_mapptr = (dp_map_t) + kalloc(INDIRECT_PAGEMAP_SIZE(new_size)); + old_mapptr = pager->map; + for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++) + new_mapptr[i] = old_mapptr[i]; + for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++) + new_mapptr[i].indirect = (dp_map_t)0; + kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size)); + pager->map = new_mapptr; + pager->size = new_size; +#ifdef CHECKSUM + new_mapptr = (vm_offset_t *) + kalloc(INDIRECT_PAGEMAP_SIZE(new_size)); + old_mapptr = pager->checksum; + for (i = 0; i < INDIRECT_PAGEMAP_ENTRIES(old_size); i++) + new_mapptr[i] = old_mapptr[i]; + for (; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++) + new_mapptr[i] = 0; + kfree((char *)old_mapptr, INDIRECT_PAGEMAP_SIZE(old_size)); + pager->checksum = new_mapptr; +#endif CHECKSUM +#if DEBUG_READER_CONFLICTS + pager->writer = FALSE; +#endif + mutex_unlock(&pager->lock); + ddprintf ("pager_extend 1 mapptr %x [3b] = %x\n", new_mapptr, + new_mapptr[0x3b]); + if (new_mapptr[0x3b].indirect > 0x10000 + && new_mapptr[0x3b].indirect != NO_BLOCK) + panic ("debug panic"); + return; + } + + if (INDIRECT_PAGEMAP(new_size)) { + /* + * Changing from direct map to indirect map. + * Allocate both indirect and direct map blocks, + * since second-level (direct) block must be + * full size (PAGEMAP_SIZE(PAGEMAP_ENTRIES)). + */ + + /* + * Allocate new second-level map first. + */ + new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES)); + old_mapptr = pager->map; + for (i = 0; i < old_size; i++) + new_mapptr[i] = old_mapptr[i]; + for (; i < PAGEMAP_ENTRIES; i++) + invalidate_block(new_mapptr[i]); + kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size)); + old_mapptr = new_mapptr; + + ddprintf ("pager_extend 2 mapptr %x [3b] = %x\n", new_mapptr, + new_mapptr[0x3b]); + if (new_mapptr[0x3b].indirect > 0x10000 + && new_mapptr[0x3b].indirect != NO_BLOCK) + panic ("debug panic"); + + /* + * Now allocate indirect map. + */ + new_mapptr = (dp_map_t) + kalloc(INDIRECT_PAGEMAP_SIZE(new_size)); + new_mapptr[0].indirect = old_mapptr; + for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++) + new_mapptr[i].indirect = 0; + pager->map = new_mapptr; + pager->size = new_size; +#ifdef CHECKSUM + /* + * Allocate new second-level map first. + */ + new_mapptr = (vm_offset_t *)kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES)); + old_mapptr = pager->checksum; + for (i = 0; i < old_size; i++) + new_mapptr[i] = old_mapptr[i]; + for (; i < PAGEMAP_ENTRIES; i++) + new_mapptr[i] = NO_CHECKSUM; + kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size)); + old_mapptr = new_mapptr; + + /* + * Now allocate indirect map. + */ + new_mapptr = (vm_offset_t *) + kalloc(INDIRECT_PAGEMAP_SIZE(new_size)); + new_mapptr[0] = (vm_offset_t) old_mapptr; + for (i = 1; i < INDIRECT_PAGEMAP_ENTRIES(new_size); i++) + new_mapptr[i] = 0; + pager->checksum = new_mapptr; +#endif CHECKSUM +#if DEBUG_READER_CONFLICTS + pager->writer = FALSE; +#endif + mutex_unlock(&pager->lock); + return; + } + /* + * Enlarging a direct block. + */ + new_mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(new_size)); + old_mapptr = pager->map; + for (i = 0; i < old_size; i++) + new_mapptr[i] = old_mapptr[i]; + for (; i < new_size; i++) + invalidate_block(new_mapptr[i]); + kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size)); + pager->map = new_mapptr; + pager->size = new_size; +#ifdef CHECKSUM + new_mapptr = (vm_offset_t *) + kalloc(PAGEMAP_SIZE(new_size)); + old_mapptr = pager->checksum; + for (i = 0; i < old_size; i++) + new_mapptr[i] = old_mapptr[i]; + for (; i < new_size; i++) + new_mapptr[i] = NO_CHECKSUM; + kfree((char *)old_mapptr, PAGEMAP_SIZE(old_size)); + pager->checksum = new_mapptr; +#endif CHECKSUM +#if DEBUG_READER_CONFLICTS + pager->writer = FALSE; +#endif + mutex_unlock(&pager->lock); +} + +/* + * Given an offset within a paging object, find the + * corresponding block within the paging partition. + * Return NO_BLOCK if none allocated. + */ +union dp_map +pager_read_offset(pager, offset) + register dpager_t pager; + vm_offset_t offset; +{ + register vm_offset_t f_page; + union dp_map pager_offset; + + f_page = atop(offset); + +#if DEBUG_READER_CONFLICTS + if (pager->readers > 0) + default_pager_read_conflicts++; /* would have proceeded with + read/write lock */ +#endif + mutex_lock(&pager->lock); /* XXX lock_read */ +#if DEBUG_READER_CONFLICTS + pager->readers++; +#endif + if (f_page >= pager->size) + { + ddprintf ("%spager_read_offset pager %x: bad page %d >= size %d", + my_name, pager, f_page, pager->size); + return (union dp_map) (union dp_map *) NO_BLOCK; +#if 0 + panic("%spager_read_offset",my_name); +#endif + } + + if (INDIRECT_PAGEMAP(pager->size)) { + register dp_map_t mapptr; + + mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect; + if (mapptr == 0) + invalidate_block(pager_offset); + else + pager_offset = mapptr[f_page%PAGEMAP_ENTRIES]; + } + else { + pager_offset = pager->map[f_page]; + } + +#if DEBUG_READER_CONFLICTS + pager->readers--; +#endif + mutex_unlock(&pager->lock); + return (pager_offset); +} + +#if USE_PRECIOUS +/* + * Release a single disk block. + */ +pager_release_offset(pager, offset) + register dpager_t pager; + vm_offset_t offset; +{ + register union dp_map entry; + + offset = atop(offset); + + mutex_lock(&pager->lock); /* XXX lock_read */ + + if (INDIRECT_PAGEMAP(pager->size)) { + register dp_map_t mapptr; + + mapptr = pager->map[offset / PAGEMAP_ENTRIES].indirect; + entry = mapptr[offset % PAGEMAP_ENTRIES]; + invalidate_block(mapptr[offset % PAGEMAP_ENTRIES]); + } else { + entry = pager->map[offset]; + invalidate_block(pager->map[offset]); + } + + mutex_unlock(&pager->lock); + + pager_dealloc_page(entry.block.p_index, entry.block.p_offset, TRUE); +} +#endif /*USE_PRECIOUS*/ + + +/* + * Move a page from one partition to another + * New partition is locked, old partition is + * locked unless LOCK_OLD sez otherwise. + */ +union dp_map +pager_move_page(block) + union dp_map block; +{ + partition_t old_part, new_part; + p_index_t old_pindex, new_pindex; + union dp_map ret; + vm_size_t size; + vm_offset_t raddr, offset, new_offset; + kern_return_t rc; + static char here[] = "%spager_move_page"; + + old_pindex = block.block.p_index; + invalidate_block(ret); + + /* See if we have room to put it anywhere else */ + new_pindex = choose_partition( ptoa(1), old_pindex); + if (no_partition(new_pindex)) + return ret; + + /* this unlocks the new partition */ + new_offset = pager_alloc_page(new_pindex, FALSE); + if (new_offset == NO_BLOCK) + panic(here,my_name); + + /* + * Got the resources, now move the data + */ +ddprintf ("pager_move_page(%x,%d,%d)\n",block.block.p_offset,old_pindex,new_pindex); + old_part = partition_of(old_pindex); + offset = ptoa(block.block.p_offset); + rc = page_read_file_direct (old_part->file, + offset, + vm_page_size, + &raddr, + &size); + if (rc != 0) + panic(here,my_name); + + /* release old */ + pager_dealloc_page(old_pindex, block.block.p_offset, FALSE); + + new_part = partition_of(new_pindex); + offset = ptoa(new_offset); + rc = page_write_file_direct (new_part->file, + offset, + raddr, + size, + &size); + if (rc != 0) + panic(here,my_name); + + (void) vm_deallocate( mach_task_self(), raddr, size); + + ret.block.p_offset = new_offset; + ret.block.p_index = new_pindex; + + return ret; +} + +#ifdef CHECKSUM +/* + * Return the checksum for a block. + */ +int +pager_get_checksum(pager, offset) + register dpager_t pager; + vm_offset_t offset; +{ + register vm_offset_t f_page; + int checksum; + + f_page = atop(offset); + + mutex_lock(&pager->lock); /* XXX lock_read */ + if (f_page >= pager->size) + panic("%spager_get_checksum",my_name); + + if (INDIRECT_PAGEMAP(pager->size)) { + register vm_offset_t *mapptr; + + mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES]; + if (mapptr == 0) + checksum = NO_CHECKSUM; + else + checksum = mapptr[f_page%PAGEMAP_ENTRIES]; + } + else { + checksum = pager->checksum[f_page]; + } + + mutex_unlock(&pager->lock); + return (checksum); +} + +/* + * Remember the checksum for a block. + */ +int +pager_put_checksum(pager, offset, checksum) + register dpager_t pager; + vm_offset_t offset; + int checksum; +{ + register vm_offset_t f_page; + static char here[] = "%spager_put_checksum"; + + f_page = atop(offset); + + mutex_lock(&pager->lock); /* XXX lock_read */ + if (f_page >= pager->size) + panic(here,my_name); + + if (INDIRECT_PAGEMAP(pager->size)) { + register vm_offset_t *mapptr; + + mapptr = (vm_offset_t *)pager->checksum[f_page/PAGEMAP_ENTRIES]; + if (mapptr == 0) + panic(here,my_name); + + mapptr[f_page%PAGEMAP_ENTRIES] = checksum; + } + else { + pager->checksum[f_page] = checksum; + } + mutex_unlock(&pager->lock); +} + +/* + * Compute a checksum - XOR each 32-bit word. + */ +int +compute_checksum(addr, size) + vm_offset_t addr; + vm_size_t size; +{ + register int checksum = NO_CHECKSUM; + register int *ptr; + register int count; + + ptr = (int *)addr; + count = size / sizeof(int); + + while (--count >= 0) + checksum ^= *ptr++; + + return (checksum); +} +#endif CHECKSUM + +/* + * Given an offset within a paging object, find the + * corresponding block within the paging partition. + * Allocate a new block if necessary. + * + * WARNING: paging objects apparently may be extended + * without notice! + */ +union dp_map +pager_write_offset(pager, offset) + register dpager_t pager; + vm_offset_t offset; +{ + register vm_offset_t f_page; + register dp_map_t mapptr; + register union dp_map block; + + invalidate_block(block); + + f_page = atop(offset); + +#if DEBUG_READER_CONFLICTS + if (pager->readers > 0) + default_pager_read_conflicts++; /* would have proceeded with + read/write lock */ +#endif + mutex_lock(&pager->lock); /* XXX lock_read */ +#if DEBUG_READER_CONFLICTS + pager->readers++; +#endif + + /* Catch the case where we had no initial fit partition + for this object, but one was added later on */ + if (no_partition(pager->cur_partition)) { + p_index_t new_part; + vm_size_t size; + + size = (f_page > pager->size) ? f_page : pager->size; + new_part = choose_partition(ptoa(size), P_INDEX_INVALID); + if (no_partition(new_part)) + new_part = choose_partition(ptoa(1), P_INDEX_INVALID); + if (no_partition(new_part)) + /* give up right now to avoid confusion */ + goto out; + else + pager->cur_partition = new_part; + } + + while (f_page >= pager->size) { + ddprintf ("pager_write_offset: extending: %x %x\n", f_page, pager->size); + + /* + * Paging object must be extended. + * Remember that offset is 0-based, but size is 1-based. + */ +#if DEBUG_READER_CONFLICTS + pager->readers--; +#endif + mutex_unlock(&pager->lock); + pager_extend(pager, f_page + 1); +#if DEBUG_READER_CONFLICTS + if (pager->readers > 0) + default_pager_read_conflicts++; /* would have proceeded with + read/write lock */ +#endif + mutex_lock(&pager->lock); /* XXX lock_read */ +#if DEBUG_READER_CONFLICTS + pager->readers++; +#endif + ddprintf ("pager_write_offset: done extending: %x %x\n", f_page, pager->size); + } + + if (INDIRECT_PAGEMAP(pager->size)) { + ddprintf ("pager_write_offset: indirect\n"); + mapptr = pager->map[f_page/PAGEMAP_ENTRIES].indirect; + if (mapptr == 0) { + /* + * Allocate the indirect block + */ + register int i; + ddprintf ("pager_write_offset: allocating indirect\n"); + + mapptr = (dp_map_t) kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES)); + if (mapptr == 0) { + /* out of space! */ + no_paging_space(TRUE); + goto out; + } + pager->map[f_page/PAGEMAP_ENTRIES].indirect = mapptr; + for (i = 0; i < PAGEMAP_ENTRIES; i++) + invalidate_block(mapptr[i]); +#ifdef CHECKSUM + { + register vm_offset_t *cksumptr; + register int j; + + cksumptr = (vm_offset_t *) + kalloc(PAGEMAP_SIZE(PAGEMAP_ENTRIES)); + if (cksumptr == 0) { + /* out of space! */ + no_paging_space(TRUE); + goto out; + } + pager->checksum[f_page/PAGEMAP_ENTRIES] + = (vm_offset_t)cksumptr; + for (j = 0; j < PAGEMAP_ENTRIES; j++) + cksumptr[j] = NO_CHECKSUM; + } +#endif CHECKSUM + } + f_page %= PAGEMAP_ENTRIES; + } + else { + mapptr = pager->map; + } + + block = mapptr[f_page]; + ddprintf ("pager_write_offset: block starts as %x[%x] %x\n", mapptr, f_page, block); + if (no_block(block)) { + vm_offset_t off; + + /* get room now */ + off = pager_alloc_page(pager->cur_partition, TRUE); + if (off == NO_BLOCK) { + /* + * Before giving up, try all other partitions. + */ + p_index_t new_part; + + ddprintf ("pager_write_offset: could not allocate block\n"); + /* returns it locked (if any one is non-full) */ + new_part = choose_partition( ptoa(1), pager->cur_partition); + if ( ! no_partition(new_part) ) { + +#if debug +dprintf("%s partition %x filled,", my_name, pager->cur_partition); +dprintf("extending object %x (size %x) to %x.\n", + pager, pager->size, new_part); +#endif + + /* this one tastes better */ + pager->cur_partition = new_part; + + /* this unlocks the partition too */ + off = pager_alloc_page(pager->cur_partition, FALSE); + + } + + if (off == NO_BLOCK) { + /* + * Oh well. + */ + overcommitted(FALSE, 1); + goto out; + } + ddprintf ("pager_write_offset: decided to allocate block\n"); + } + block.block.p_offset = off; + block.block.p_index = pager->cur_partition; + mapptr[f_page] = block; + ddprintf ("pager_write_offset: mapptr %x [3b] = %x\n", mapptr, + mapptr[0x3b]); + ddprintf ("pager_write_offset: block is finally %x\n", block); + } + +out: + +#if DEBUG_READER_CONFLICTS + pager->readers--; +#endif + mutex_unlock(&pager->lock); + return (block); +} + +/* + * Deallocate all of the blocks belonging to a paging object. + * No locking needed because no other operations can be in progress. + */ +void +pager_dealloc(pager) + register dpager_t pager; +{ + register int i, j; + register dp_map_t mapptr; + register union dp_map block; + + if (INDIRECT_PAGEMAP(pager->size)) { + for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) { + mapptr = pager->map[i].indirect; + if (mapptr != 0) { + for (j = 0; j < PAGEMAP_ENTRIES; j++) { + block = mapptr[j]; + if ( ! no_block(block) ) + pager_dealloc_page(block.block.p_index, + block.block.p_offset, TRUE); + } + kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES)); + } + } + kfree((char *)pager->map, INDIRECT_PAGEMAP_SIZE(pager->size)); +#ifdef CHECKSUM + for (i = INDIRECT_PAGEMAP_ENTRIES(pager->size); --i >= 0; ) { + mapptr = (vm_offset_t *)pager->checksum[i]; + if (mapptr) { + kfree((char *)mapptr, PAGEMAP_SIZE(PAGEMAP_ENTRIES)); + } + } + kfree((char *)pager->checksum, + INDIRECT_PAGEMAP_SIZE(pager->size)); +#endif CHECKSUM + } + else { + mapptr = pager->map; + for (i = 0; i < pager->size; i++ ) { + block = mapptr[i]; + if ( ! no_block(block) ) + pager_dealloc_page(block.block.p_index, + block.block.p_offset, TRUE); + } + kfree((char *)pager->map, PAGEMAP_SIZE(pager->size)); +#ifdef CHECKSUM + kfree((char *)pager->checksum, PAGEMAP_SIZE(pager->size)); +#endif CHECKSUM + } +} + +/* + * Move all the pages of a PAGER that live in a + * partition PINDEX somewhere else. + * Pager should be write-locked, partition too. + * Returns FALSE if it could not do it, but + * some pages might have been moved nonetheless. + */ +boolean_t +pager_realloc(pager, pindex) + register dpager_t pager; + p_index_t pindex; +{ + register dp_map_t map, emap; + vm_size_t size; + union dp_map block; + + size = pager->size; /* in pages */ + map = pager->map; + + if (INDIRECT_PAGEMAP(size)) { + for (emap = &map[INDIRECT_PAGEMAP_ENTRIES(size)]; + map < emap; map++) { + + register dp_map_t map2, emap2; + + if ((map2 = map->indirect) == 0) + continue; + + for (emap2 = &map2[PAGEMAP_ENTRIES]; + map2 < emap2; map2++) + if ( map2->block.p_index == pindex) { + + block = pager_move_page(*map2); + if (!no_block(block)) + *map2 = block; + else + return FALSE; + } + + } + goto ok; + } + + /* A small one */ + for (emap = &map[size]; map < emap; map++) + if (map->block.p_index == pindex) { + block = pager_move_page(*map); + if (!no_block(block)) + *map = block; + else + return FALSE; + } +ok: + pager->cur_partition = choose_partition(0, P_INDEX_INVALID); + return TRUE; +} + +/* + + */ + +/* + * Read/write routines. + */ +#define PAGER_SUCCESS 0 +#define PAGER_ABSENT 1 +#define PAGER_ERROR 2 + +/* + * Read data from a default pager. Addr is the address of a buffer + * to fill. Out_addr returns the buffer that contains the data; + * if it is different from <addr>, it must be deallocated after use. + */ +int +default_read(ds, addr, size, offset, out_addr, deallocate) + register dpager_t ds; + vm_offset_t addr; /* pointer to block to fill */ + register vm_size_t size; + register vm_offset_t offset; + vm_offset_t *out_addr; + /* returns pointer to data */ + boolean_t deallocate; +{ + register union dp_map block; + vm_offset_t raddr; + vm_size_t rsize; + register int rc; + boolean_t first_time; + register partition_t part; +#ifdef CHECKSUM + vm_size_t original_size = size; +#endif CHECKSUM + vm_offset_t original_offset = offset; + + /* + * Find the block in the paging partition + */ + block = pager_read_offset(ds, offset); + if ( no_block(block) ) + return (PAGER_ABSENT); + + /* + * Read it, trying for the entire page. + */ + offset = ptoa(block.block.p_offset); +ddprintf ("default_read(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index); + part = partition_of(block.block.p_index); + first_time = TRUE; + *out_addr = addr; + + do { + rc = page_read_file_direct(part->file, + offset, + size, + &raddr, + &rsize); + if (rc != 0) + return (PAGER_ERROR); + + /* + * If we got the entire page on the first read, return it. + */ + if (first_time && rsize == size) { + *out_addr = raddr; + break; + } + /* + * Otherwise, copy the data into the + * buffer we were passed, and try for + * the next piece. + */ + first_time = FALSE; + bcopy((char *)raddr, (char *)addr, rsize); + addr += rsize; + offset += rsize; + size -= rsize; + } while (size != 0); + +#if USE_PRECIOUS + if (deallocate) + pager_release_offset(ds, original_offset); +#endif /*USE_PRECIOUS*/ + +#ifdef CHECKSUM + { + int write_checksum, + read_checksum; + + write_checksum = pager_get_checksum(ds, original_offset); + read_checksum = compute_checksum(*out_addr, original_size); + if (write_checksum != read_checksum) { + panic( + "PAGER CHECKSUM ERROR: offset 0x%x, written 0x%x, read 0x%x", + original_offset, write_checksum, read_checksum); + } + } +#endif CHECKSUM + return (PAGER_SUCCESS); +} + +int +default_write(ds, addr, size, offset) + register dpager_t ds; + register vm_offset_t addr; + register vm_size_t size; + register vm_offset_t offset; +{ + register union dp_map block; + partition_t part; + vm_size_t wsize; + register int rc; + + ddprintf ("default_write: pager offset %x\n", offset); + + /* + * Find block in paging partition + */ + block = pager_write_offset(ds, offset); + if ( no_block(block) ) + return (PAGER_ERROR); + +#ifdef CHECKSUM + /* + * Save checksum + */ + { + int checksum; + + checksum = compute_checksum(addr, size); + pager_put_checksum(ds, offset, checksum); + } +#endif CHECKSUM + offset = ptoa(block.block.p_offset); +ddprintf ("default_write(%x,%x,%x,%d)\n",addr,size,offset,block.block.p_index); + part = partition_of(block.block.p_index); + + /* + * There are various assumptions made here,we + * will not get into the next disk 'block' by + * accident. It might well be non-contiguous. + */ + do { + rc = page_write_file_direct(part->file, + offset, + addr, + size, + &wsize); + if (rc != 0) { + dprintf("*** PAGER ERROR: default_write: "); + dprintf("ds=0x%x addr=0x%x size=0x%x offset=0x%x resid=0x%x\n", + ds, addr, size, offset, wsize); + return (PAGER_ERROR); + } + addr += wsize; + offset += wsize; + size -= wsize; + } while (size != 0); + return (PAGER_SUCCESS); +} + +boolean_t +default_has_page(ds, offset) + dpager_t ds; + vm_offset_t offset; +{ + return ( ! no_block(pager_read_offset(ds, offset)) ); +} + +/* + + */ + +/* + * Mapping between pager port and paging object. + */ +struct dstruct { + queue_chain_t links; /* Link in pager-port list */ + + struct mutex lock; /* Lock for the structure */ + struct condition + waiting_seqno, /* someone waiting on seqno */ + waiting_read, /* someone waiting on readers */ + waiting_write, /* someone waiting on writers */ + waiting_refs; /* someone waiting on refs */ + + memory_object_t pager; /* Pager port */ + mach_port_seqno_t seqno; /* Pager port sequence number */ + mach_port_t pager_request; /* Request port */ + mach_port_urefs_t request_refs; /* Request port user-refs */ + mach_port_t pager_name; /* Name port */ + mach_port_urefs_t name_refs; /* Name port user-refs */ + + unsigned int readers; /* Reads in progress */ + unsigned int writers; /* Writes in progress */ + + unsigned int errors; /* Pageout error count */ + struct dpager dpager; /* Actual pager */ +}; +typedef struct dstruct * default_pager_t; +#define DEFAULT_PAGER_NULL ((default_pager_t)0) + +#if PARALLEL +#define dstruct_lock_init(ds) mutex_init(&ds->lock) +#define dstruct_lock(ds) mutex_lock(&ds->lock) +#define dstruct_unlock(ds) mutex_unlock(&ds->lock) +#else /* PARALLEL */ +#define dstruct_lock_init(ds) +#define dstruct_lock(ds) +#define dstruct_unlock(ds) +#endif /* PARALLEL */ + +/* + * List of all pagers. A specific pager is + * found directly via its port, this list is + * only used for monitoring purposes by the + * default_pager_object* calls + */ +struct pager_port { + queue_head_t queue; + struct mutex lock; + int count; /* saves code */ + queue_head_t leak_queue; +} all_pagers; + +#define pager_port_list_init() \ +{ \ + mutex_init(&all_pagers.lock); \ + queue_init(&all_pagers.queue); \ + queue_init(&all_pagers.leak_queue); \ + all_pagers.count = 0; \ +} + +void pager_port_list_insert(port, ds) + mach_port_t port; + default_pager_t ds; +{ + mutex_lock(&all_pagers.lock); + queue_enter(&all_pagers.queue, ds, default_pager_t, links); + all_pagers.count++; + mutex_unlock(&all_pagers.lock); +} + +/* given a data structure return a good port-name to associate it to */ +#define pnameof(_x_) (((vm_offset_t)(_x_))+1) +/* reverse, assumes no-odd-pointers */ +#define dnameof(_x_) (((vm_offset_t)(_x_))&~1) + +/* The magic typecast */ +#define pager_port_lookup(_port_) \ + ((! MACH_PORT_VALID(_port_) || \ + ((default_pager_t)dnameof(_port_))->pager != (_port_)) ? \ + DEFAULT_PAGER_NULL : (default_pager_t)dnameof(_port_)) + +void pager_port_list_delete(ds) + default_pager_t ds; +{ + mutex_lock(&all_pagers.lock); + queue_remove(&all_pagers.queue, ds, default_pager_t, links); + all_pagers.count--; + mutex_unlock(&all_pagers.lock); +} + +/* + * Destroy a paging partition. + * XXX this is not re-entrant XXX + */ +kern_return_t +destroy_paging_partition(name, pp_private) + char *name; + void **pp_private; +{ + register unsigned int id = part_id(name); + register partition_t part; + boolean_t all_ok = TRUE; + default_pager_t entry; + int pindex; + + /* + * Find and take partition out of list + * This prevents choose_partition from + * getting in the way. + */ + mutex_lock(&all_partitions.lock); + for (pindex = 0; pindex < all_partitions.n_partitions; pindex++) { + part = partition_of(pindex); + if (part && (part->id == id)) break; + } + if (pindex == all_partitions.n_partitions) { + mutex_unlock(&all_partitions.lock); + return KERN_INVALID_ARGUMENT; + } + part->going_away = TRUE; + mutex_unlock(&all_partitions.lock); + + /* + * This might take a while.. + */ +all_over_again: +#if debug +dprintf("Partition x%x (id x%x) for %s, all_ok %d\n", part, id, name, all_ok); +#endif + all_ok = TRUE; + mutex_lock(&part->p_lock); + + mutex_lock(&all_pagers.lock); + queue_iterate(&all_pagers.queue, entry, default_pager_t, links) { + + dstruct_lock(entry); + + if (!mutex_try_lock(&entry->dpager.lock)) { + + dstruct_unlock(entry); + mutex_unlock(&all_pagers.lock); + mutex_unlock(&part->p_lock); + + /* yield the processor */ + (void) thread_switch(MACH_PORT_NULL, + SWITCH_OPTION_NONE, 0); + + goto all_over_again; + + } + + /* + * See if we can relocate all the pages of this object + * currently on this partition on some other partition + */ + all_ok = pager_realloc(&entry->dpager, pindex); + + mutex_unlock(&entry->dpager.lock); + dstruct_unlock(entry); + + if (!all_ok) break; + + } + mutex_unlock(&all_pagers.lock); + + if (all_ok) { + /* No need to unlock partition, there are no refs left */ + + set_partition_of(pindex, 0); + *pp_private = part->file; + kfree(part->bitmap, howmany(part->total_size, NB_BM) * sizeof(bm_entry_t)); + kfree(part, sizeof(struct part)); + dprintf("%s Removed paging partition %s\n", my_name, name); + return KERN_SUCCESS; + } + + /* + * Put partition back in. + */ + part->going_away = FALSE; + + return KERN_FAILURE; +} + + +/* + * We use the sequence numbers on requests to regulate + * our parallelism. In general, we allow multiple reads and writes + * to proceed in parallel, with the exception that reads must + * wait for previous writes to finish. (Because the kernel might + * generate a data-request for a page on the heels of a data-write + * for the same page, and we must avoid returning stale data.) + * terminate requests wait for proceeding reads and writes to finish. + */ + +unsigned int default_pager_total = 0; /* debugging */ +unsigned int default_pager_wait_seqno = 0; /* debugging */ +unsigned int default_pager_wait_read = 0; /* debugging */ +unsigned int default_pager_wait_write = 0; /* debugging */ +unsigned int default_pager_wait_refs = 0; /* debugging */ + +#if PARALLEL +/* + * Waits for correct sequence number. Leaves pager locked. + */ +void pager_port_lock(ds, seqno) + default_pager_t ds; + mach_port_seqno_t seqno; +{ + default_pager_total++; +ddprintf ("pager_port_lock <%p>: <%p>: %d: 1\n", &ds, ds, seqno); + dstruct_lock(ds); +ddprintf ("pager_port_lock <%p>: <%p>: %d: 2\n", &ds, ds, seqno); + while (ds->seqno != seqno) { +ddprintf ("pager_port_lock <%p>: <%p>: %d: 3\n", &ds, ds, seqno); + default_pager_wait_seqno++; + condition_wait(&ds->waiting_seqno, &ds->lock); +ddprintf ("pager_port_lock <%p>: <%p>: %d: 4\n", &ds, ds, seqno); + } +} + +/* + * Increments sequence number and unlocks pager. + */ +void pager_port_unlock(ds) + default_pager_t ds; +{ + ds->seqno++; +ddprintf ("pager_port_unlock <%p>: <%p>: seqno => %d\n", &ds, ds, ds->seqno); + dstruct_unlock(ds); +ddprintf ("pager_port_unlock <%p>: <%p>: 2\n", &ds, ds); + condition_broadcast(&ds->waiting_seqno); +ddprintf ("pager_port_unlock <%p>: <%p>: 3\n", &ds, ds); +} + +/* + * Start a read - one more reader. Pager must be locked. + */ +void pager_port_start_read(ds) + default_pager_t ds; +{ + ds->readers++; +} + +/* + * Wait for readers. Unlocks and relocks pager if wait needed. + */ +void pager_port_wait_for_readers(ds) + default_pager_t ds; +{ + while (ds->readers != 0) { + default_pager_wait_read++; + condition_wait(&ds->waiting_read, &ds->lock); + } +} + +/* + * Finish a read. Pager is unlocked and returns unlocked. + */ +void pager_port_finish_read(ds) + default_pager_t ds; +{ + dstruct_lock(ds); + if (--ds->readers == 0) { + dstruct_unlock(ds); + condition_broadcast(&ds->waiting_read); + } + else { + dstruct_unlock(ds); + } +} + +/* + * Start a write - one more writer. Pager must be locked. + */ +void pager_port_start_write(ds) + default_pager_t ds; +{ + ds->writers++; +} + +/* + * Wait for writers. Unlocks and relocks pager if wait needed. + */ +void pager_port_wait_for_writers(ds) + default_pager_t ds; +{ + while (ds->writers != 0) { + default_pager_wait_write++; + condition_wait(&ds->waiting_write, &ds->lock); + } +} + +/* + * Finish a write. Pager is unlocked and returns unlocked. + */ +void pager_port_finish_write(ds) + default_pager_t ds; +{ + dstruct_lock(ds); + if (--ds->writers == 0) { + dstruct_unlock(ds); + condition_broadcast(&ds->waiting_write); + } + else { + dstruct_unlock(ds); + } +} + +/* + * Wait for concurrent default_pager_objects. + * Unlocks and relocks pager if wait needed. + */ +void pager_port_wait_for_refs(ds) + default_pager_t ds; +{ + while (ds->name_refs == 0) { + default_pager_wait_refs++; + condition_wait(&ds->waiting_refs, &ds->lock); + } +} + +/* + * Finished creating name refs - wake up waiters. + */ +void pager_port_finish_refs(ds) + default_pager_t ds; +{ + condition_broadcast(&ds->waiting_refs); +} + +#else /* PARALLEL */ + +#define pager_port_lock(ds,seqno) +#define pager_port_unlock(ds) +#define pager_port_start_read(ds) +#define pager_port_wait_for_readers(ds) +#define pager_port_finish_read(ds) +#define pager_port_start_write(ds) +#define pager_port_wait_for_writers(ds) +#define pager_port_finish_write(ds) +#define pager_port_wait_for_refs(ds) +#define pager_port_finish_refs(ds) + +#endif /* PARALLEL */ + +/* + * Default pager. + */ +task_t default_pager_self; /* Our task port. */ + +mach_port_t default_pager_default_port; /* Port for memory_object_create. */ + +/* We catch exceptions on ourself & startup using this port. */ +mach_port_t default_pager_exception_port; +/* We receive bootstrap requests on this port. */ +mach_port_t default_pager_bootstrap_port; + +mach_port_t default_pager_internal_set; /* Port set for internal objects. */ +mach_port_t default_pager_external_set; /* Port set for external objects. */ +mach_port_t default_pager_default_set; /* Port set for "default" thread. */ + +typedef struct default_pager_thread { + cthread_t dpt_thread; /* Server thread. */ + vm_offset_t dpt_buffer; /* Read buffer. */ + boolean_t dpt_internal; /* Do we handle internal objects? */ +} default_pager_thread_t; + +#if PARALLEL + /* determine number of threads at run time */ +#define DEFAULT_PAGER_INTERNAL_COUNT (0) + +#else /* PARALLEL */ +#define DEFAULT_PAGER_INTERNAL_COUNT (1) +#endif /* PARALLEL */ + +/* Memory created by default_pager_object_create should mostly be resident. */ +#define DEFAULT_PAGER_EXTERNAL_COUNT (1) + +unsigned int default_pager_internal_count = DEFAULT_PAGER_INTERNAL_COUNT; + /* Number of "internal" threads. */ +unsigned int default_pager_external_count = DEFAULT_PAGER_EXTERNAL_COUNT; + /* Number of "external" threads. */ + +default_pager_t pager_port_alloc(size) + vm_size_t size; +{ + default_pager_t ds; + p_index_t part; + + ds = (default_pager_t) kalloc(sizeof *ds); + if (ds == DEFAULT_PAGER_NULL) + panic("%spager_port_alloc",my_name); + bzero((char *) ds, sizeof *ds); + + dstruct_lock_init(ds); + + /* + * Get a suitable partition. If none big enough + * just pick one and overcommit. If no partitions + * at all.. well just fake one so that we will + * kill specific objects on pageouts rather than + * panicing the system now. + */ + part = choose_partition(size, P_INDEX_INVALID); + if (no_partition(part)) { + overcommitted(FALSE, atop(size)); + part = choose_partition(0,P_INDEX_INVALID); +#if debug + if (no_partition(part)) + dprintf("%s No paging space at all !!\n", my_name); +#endif + } + pager_alloc(&ds->dpager, part, size); + + return ds; +} + +mach_port_urefs_t default_pager_max_urefs = 10000; + +/* + * Check user reference count on pager_request port. + * Pager must be locked. + * Unlocks and re-locks pager if needs to call kernel. + */ +void pager_port_check_request(ds, pager_request) + default_pager_t ds; + mach_port_t pager_request; +{ + mach_port_delta_t delta; + kern_return_t kr; + + assert(ds->pager_request == pager_request); + + if (++ds->request_refs > default_pager_max_urefs) { + delta = 1 - ds->request_refs; + ds->request_refs = 1; + + dstruct_unlock(ds); + + /* + * Deallocate excess user references. + */ + + kr = mach_port_mod_refs(default_pager_self, pager_request, + MACH_PORT_RIGHT_SEND, delta); + if (kr != KERN_SUCCESS) + panic("%spager_port_check_request",my_name); + + dstruct_lock(ds); + } +} + +void default_pager_add(ds, internal) + default_pager_t ds; + boolean_t internal; +{ + mach_port_t pager = ds->pager; + mach_port_t pset; + mach_port_mscount_t sync; + mach_port_t previous; + kern_return_t kr; + static char here[] = "%sdefault_pager_add"; + + /* + * The port currently has a make-send count of zero, + * because either we just created the port or we just + * received the port in a memory_object_create request. + */ + + if (internal) { + /* possibly generate an immediate no-senders notification */ + sync = 0; + pset = default_pager_internal_set; + } else { + /* delay notification till send right is created */ + sync = 1; + pset = default_pager_external_set; + } + + kr = mach_port_request_notification(default_pager_self, pager, + MACH_NOTIFY_NO_SENDERS, sync, + pager, MACH_MSG_TYPE_MAKE_SEND_ONCE, + &previous); + if ((kr != KERN_SUCCESS) || (previous != MACH_PORT_NULL)) + panic(here,my_name); + + kr = mach_port_move_member(default_pager_self, pager, pset); + if (kr != KERN_SUCCESS) + panic(here,my_name); +} + +/* + * Routine: memory_object_create + * Purpose: + * Handle requests for memory objects from the + * kernel. + * Notes: + * Because we only give out the default memory + * manager port to the kernel, we don't have to + * be so paranoid about the contents. + */ +kern_return_t +seqnos_memory_object_create(old_pager, seqno, new_pager, new_size, + new_pager_request, new_pager_name, new_page_size) + mach_port_t old_pager; + mach_port_seqno_t seqno; + mach_port_t new_pager; + vm_size_t new_size; + mach_port_t new_pager_request; + mach_port_t new_pager_name; + vm_size_t new_page_size; +{ + register default_pager_t ds; + kern_return_t kr; + + assert(old_pager == default_pager_default_port); + assert(MACH_PORT_VALID(new_pager_request)); + assert(MACH_PORT_VALID(new_pager_name)); + assert(new_page_size == vm_page_size); + + ds = pager_port_alloc(new_size); +rename_it: + kr = mach_port_rename( default_pager_self, + new_pager, (mach_port_t)pnameof(ds)); + if (kr != KERN_SUCCESS) { + default_pager_t ds1; + + if (kr != KERN_NAME_EXISTS) + panic("%s m_o_create", my_name); + ds1 = (default_pager_t) kalloc(sizeof *ds1); + *ds1 = *ds; + mutex_lock(&all_pagers.lock); + queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links); + mutex_unlock(&all_pagers.lock); + ds = ds1; + goto rename_it; + } + + new_pager = (mach_port_t) pnameof(ds); + + /* + * Set up associations between these ports + * and this default_pager structure + */ + + ds->pager = new_pager; + ds->pager_request = new_pager_request; + ds->request_refs = 1; + ds->pager_name = new_pager_name; + ds->name_refs = 1; + + /* + * After this, other threads might receive requests + * for this memory object or find it in the port list. + */ + + pager_port_list_insert(new_pager, ds); + default_pager_add(ds, TRUE); + + return(KERN_SUCCESS); +} + +memory_object_copy_strategy_t default_pager_copy_strategy = + MEMORY_OBJECT_COPY_DELAY; + +kern_return_t +seqnos_memory_object_init(pager, seqno, pager_request, pager_name, + pager_page_size) + mach_port_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + mach_port_t pager_name; + vm_size_t pager_page_size; +{ + register default_pager_t ds; + kern_return_t kr; + static char here[] = "%sinit"; + + assert(MACH_PORT_VALID(pager_request)); + assert(MACH_PORT_VALID(pager_name)); + assert(pager_page_size == vm_page_size); + + ds = pager_port_lookup(pager); + if (ds == DEFAULT_PAGER_NULL) + panic(here, my_name); + pager_port_lock(ds, seqno); + + if (ds->pager_request != MACH_PORT_NULL) + panic(here, my_name); + + ds->pager_request = pager_request; + ds->request_refs = 1; + ds->pager_name = pager_name; + ds->name_refs = 1; + + /* + * Even if the kernel immediately terminates the object, + * the pager_request port won't be destroyed until + * we process the terminate request, which won't happen + * until we unlock the object. + */ + + kr = memory_object_set_attributes(pager_request, + TRUE, + FALSE, /* do not cache */ + default_pager_copy_strategy); + if (kr != KERN_SUCCESS) + panic(here, my_name); + + pager_port_unlock(ds); + + return(KERN_SUCCESS); +} + +kern_return_t +seqnos_memory_object_terminate(pager, seqno, pager_request, pager_name) + mach_port_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + mach_port_t pager_name; +{ + register default_pager_t ds; + mach_port_urefs_t request_refs, name_refs; + kern_return_t kr; + static char here[] = "%sterminate"; + + /* + * pager_request and pager_name are receive rights, + * not send rights. + */ + + ds = pager_port_lookup(pager); + if (ds == DEFAULT_PAGER_NULL) + panic(here, my_name); +ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n", + &kr, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno); + pager_port_lock(ds, seqno); + + /* + * Wait for read and write requests to terminate. + */ + + pager_port_wait_for_readers(ds); + pager_port_wait_for_writers(ds); + + /* + * After memory_object_terminate both memory_object_init + * and a no-senders notification are possible, so we need + * to clean up the request and name ports but leave + * the pager port. + * + * A concurrent default_pager_objects might be allocating + * more references for the name port. In this case, + * we must first wait for it to finish. + */ + + pager_port_wait_for_refs(ds); + + ds->pager_request = MACH_PORT_NULL; + request_refs = ds->request_refs; + ds->request_refs = 0; + assert(ds->pager_name == pager_name); + ds->pager_name = MACH_PORT_NULL; + name_refs = ds->name_refs; + ds->name_refs = 0; +ddprintf ("seqnos_memory_object_terminate <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n", + &kr, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held); + pager_port_unlock(ds); + + /* + * Now we deallocate our various port rights. + */ + + kr = mach_port_mod_refs(default_pager_self, pager_request, + MACH_PORT_RIGHT_SEND, -request_refs); + if (kr != KERN_SUCCESS) + panic(here,my_name); + + kr = mach_port_mod_refs(default_pager_self, pager_request, + MACH_PORT_RIGHT_RECEIVE, -1); + if (kr != KERN_SUCCESS) + panic(here,my_name); + + kr = mach_port_mod_refs(default_pager_self, pager_name, + MACH_PORT_RIGHT_SEND, -name_refs); + if (kr != KERN_SUCCESS) + panic(here,my_name); + + kr = mach_port_mod_refs(default_pager_self, pager_name, + MACH_PORT_RIGHT_RECEIVE, -1); + if (kr != KERN_SUCCESS) + panic(here,my_name); + + return (KERN_SUCCESS); +} + +void default_pager_no_senders(pager, seqno, mscount) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_mscount_t mscount; +{ + register default_pager_t ds; + kern_return_t kr; + static char here[] = "%sno_senders"; + + /* + * Because we don't give out multiple send rights + * for a memory object, there can't be a race + * between getting a no-senders notification + * and creating a new send right for the object. + * Hence we don't keep track of mscount. + */ + + + ds = pager_port_lookup(pager); + if (ds == DEFAULT_PAGER_NULL) + panic(here,my_name); + pager_port_lock(ds, seqno); + + /* + * We shouldn't get a no-senders notification + * when the kernel has the object cached. + */ + + if (ds->pager_request != MACH_PORT_NULL) + panic(here,my_name); + + /* + * Unlock the pager (though there should be no one + * waiting for it). + */ + dstruct_unlock(ds); + + /* + * Remove the memory object port association, and then + * the destroy the port itself. We must remove the object + * from the port list before deallocating the pager, + * because of default_pager_objects. + */ + + pager_port_list_delete(ds); + pager_dealloc(&ds->dpager); + + kr = mach_port_mod_refs(default_pager_self, pager, + MACH_PORT_RIGHT_RECEIVE, -1); + if (kr != KERN_SUCCESS) + panic(here,my_name); + + /* + * Do this *after* deallocating the port name + */ + kfree((char *) ds, sizeof(*ds)); + + /* + * Recover memory that we might have wasted because + * of name conflicts + */ + mutex_lock(&all_pagers.lock); + + while (!queue_empty(&all_pagers.leak_queue)) { + + ds = (default_pager_t) queue_first(&all_pagers.leak_queue); + queue_remove_first(&all_pagers.leak_queue, ds, default_pager_t, links); + kfree((char *) ds, sizeof(*ds)); + } + + mutex_unlock(&all_pagers.lock); +} + +int default_pager_pagein_count = 0; +int default_pager_pageout_count = 0; + +kern_return_t +seqnos_memory_object_data_request(pager, seqno, reply_to, offset, + length, protection_required) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t reply_to; + vm_offset_t offset; + vm_size_t length; + vm_prot_t protection_required; +{ + default_pager_thread_t *dpt; + default_pager_t ds; + vm_offset_t addr; + unsigned int errors; + kern_return_t rc; + static char here[] = "%sdata_request"; + + dpt = (default_pager_thread_t *) cthread_data(cthread_self()); + + if (length != vm_page_size) + panic(here,my_name); + + ds = pager_port_lookup(pager); + if (ds == DEFAULT_PAGER_NULL) + panic(here,my_name); +ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n", + &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno); + pager_port_lock(ds, seqno); + pager_port_check_request(ds, reply_to); + pager_port_wait_for_writers(ds); + pager_port_start_read(ds); + + /* + * Get error count while pager locked. + */ + errors = ds->errors; + +ddprintf ("seqnos_memory_object_data_request <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n", + &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held); + pager_port_unlock(ds); + + if (errors) { + dprintf("%s %s\n", my_name, + "dropping data_request because of previous paging errors"); + (void) memory_object_data_error(reply_to, + offset, vm_page_size, + KERN_FAILURE); + goto done; + } + + rc = default_read(&ds->dpager, dpt->dpt_buffer, + vm_page_size, offset, + &addr, protection_required & VM_PROT_WRITE); + + switch (rc) { + case PAGER_SUCCESS: + if (addr != dpt->dpt_buffer) { + /* + * Deallocates data buffer + */ + (void) memory_object_data_supply( + reply_to, offset, + addr, vm_page_size, TRUE, + VM_PROT_NONE, + FALSE, MACH_PORT_NULL); + } else { + (void) memory_object_data_provided( + reply_to, offset, + addr, vm_page_size, + VM_PROT_NONE); + } + break; + + case PAGER_ABSENT: + (void) memory_object_data_unavailable( + reply_to, + offset, + vm_page_size); + break; + + case PAGER_ERROR: + (void) memory_object_data_error( + reply_to, + offset, + vm_page_size, + KERN_FAILURE); + break; + } + + default_pager_pagein_count++; + + done: + pager_port_finish_read(ds); + return(KERN_SUCCESS); +} + +/* + * memory_object_data_initialize: check whether we already have each page, and + * write it if we do not. The implementation is far from optimized, and + * also assumes that the default_pager is single-threaded. + */ +kern_return_t +seqnos_memory_object_data_initialize(pager, seqno, pager_request, + offset, addr, data_cnt) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + register + vm_offset_t offset; + register + pointer_t addr; + vm_size_t data_cnt; +{ + vm_offset_t amount_sent; + default_pager_t ds; + static char here[] = "%sdata_initialize"; + +#ifdef lint + pager_request++; +#endif lint + + ds = pager_port_lookup(pager); + if (ds == DEFAULT_PAGER_NULL) + panic(here,my_name); +ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n", + &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno); + pager_port_lock(ds, seqno); + pager_port_check_request(ds, pager_request); + pager_port_start_write(ds); +ddprintf ("seqnos_memory_object_data_initialize <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n", + &ds, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held); + pager_port_unlock(ds); + + for (amount_sent = 0; + amount_sent < data_cnt; + amount_sent += vm_page_size) { + + if (!default_has_page(&ds->dpager, offset + amount_sent)) { + if (default_write(&ds->dpager, + addr + amount_sent, + vm_page_size, + offset + amount_sent) + != PAGER_SUCCESS) { + dprintf("%s%s write error\n", my_name, here); + dstruct_lock(ds); + ds->errors++; + dstruct_unlock(ds); + } + } + } + + pager_port_finish_write(ds); + if (vm_deallocate(default_pager_self, addr, data_cnt) != KERN_SUCCESS) + panic(here,my_name); + + return(KERN_SUCCESS); +} + +/* + * memory_object_data_write: split up the stuff coming in from + * a memory_object_data_write call + * into individual pages and pass them off to default_write. + */ +kern_return_t +seqnos_memory_object_data_write(pager, seqno, pager_request, + offset, addr, data_cnt) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + register + vm_offset_t offset; + register + pointer_t addr; + vm_size_t data_cnt; +{ + register + vm_size_t amount_sent; + default_pager_t ds; + static char here[] = "%sdata_write"; + int err; + +#ifdef lint + pager_request++; +#endif lint + +ddprintf ("seqnos_memory_object_data_write <%p>: 1\n", &err); + if ((data_cnt % vm_page_size) != 0) + { + ddprintf ("fail 1: %d %d\n", data_cnt, vm_page_size); + panic(here,my_name); + } + + +ddprintf ("seqnos_memory_object_data_write <%p>: 2\n", &err); + ds = pager_port_lookup(pager); +ddprintf ("seqnos_memory_object_data_write <%p>: 3\n", &err); + if (ds == DEFAULT_PAGER_NULL) + { + ddprintf ("fail 2: %d %d\n", pager, ds); + panic(here,my_name); + } + +ddprintf ("seqnos_memory_object_data_write <%p>: 4\n", &err); +ddprintf ("seqnos_memory_object_data_write <%p>: pager_port_lock: <%p>[s:%d,r:%d,w:%d,l:%d], %d\n", + &err, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held, seqno); + pager_port_lock(ds, seqno); +ddprintf ("seqnos_memory_object_data_write <%p>: 5\n", &err); + pager_port_check_request(ds, pager_request); +ddprintf ("seqnos_memory_object_data_write <%p>: 6\n", &err); + pager_port_start_write(ds); +ddprintf ("seqnos_memory_object_data_write <%p>: 7\n", &err); +ddprintf ("seqnos_memory_object_data_write <%p>: pager_port_unlock: <%p>[s:%d,r:%d,w:%d,l:%d]\n", + &err, ds, ds->seqno, ds->readers, ds->writers, ds->lock.held); + pager_port_unlock(ds); + +ddprintf ("seqnos_memory_object_data_write <%p>: 8\n", &err); + for (amount_sent = 0; + amount_sent < data_cnt; + amount_sent += vm_page_size) { + + register int result; + +ddprintf ("seqnos_memory_object_data_write <%p>: 9\n", &err); + result = default_write(&ds->dpager, + addr + amount_sent, + vm_page_size, + offset + amount_sent); +ddprintf ("seqnos_memory_object_data_write <%p>: 10\n", &err); + if (result != KERN_SUCCESS) { +ddprintf ("seqnos_memory_object_data_write <%p>: 11\n", &err); +#if debug + dprintf("%s WRITE ERROR on default_pageout:", my_name); + dprintf(" pager=%x, offset=0x%x, length=0x%x, result=%d\n", + pager, offset+amount_sent, vm_page_size, result); +#endif + dstruct_lock(ds); + ds->errors++; + dstruct_unlock(ds); + } + default_pager_pageout_count++; + } + +ddprintf ("seqnos_memory_object_data_write <%p>: 12\n", &err); + pager_port_finish_write(ds); +ddprintf ("seqnos_memory_object_data_write <%p>: 13\n", &err); + err = vm_deallocate(default_pager_self, addr, data_cnt); +ddprintf ("seqnos_memory_object_data_write <%p>: 14\n", &err); + if (err != KERN_SUCCESS) + { + ddprintf ("fail 3: %s %s %s %s\n", default_pager_self, addr, data_cnt, &err); + + panic(here,my_name); + } + + +ddprintf ("seqnos_memory_object_data_write <%p>: 15\n", &err); + return(KERN_SUCCESS); +} + +/*ARGSUSED*/ +kern_return_t +seqnos_memory_object_copy(old_memory_object, seqno, old_memory_control, + offset, length, new_memory_object) + memory_object_t old_memory_object; + mach_port_seqno_t seqno; + memory_object_control_t + old_memory_control; + vm_offset_t offset; + vm_size_t length; + memory_object_t new_memory_object; +{ + panic("%scopy", my_name); + return KERN_FAILURE; +} + +kern_return_t +seqnos_memory_object_lock_completed(pager, seqno, pager_request, + offset, length) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + vm_offset_t offset; + vm_size_t length; +{ +#ifdef lint + pager++; seqno++; pager_request++; offset++; length++; +#endif lint + + panic("%slock_completed",my_name); + return(KERN_FAILURE); +} + +kern_return_t +seqnos_memory_object_data_unlock(pager, seqno, pager_request, + offset, addr, data_cnt) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + vm_offset_t offset; + pointer_t addr; + vm_size_t data_cnt; +{ + panic("%sdata_unlock",my_name); + return(KERN_FAILURE); +} + +kern_return_t +seqnos_memory_object_supply_completed(pager, seqno, pager_request, + offset, length, + result, error_offset) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + vm_offset_t offset; + vm_size_t length; + kern_return_t result; + vm_offset_t error_offset; +{ + panic("%ssupply_completed",my_name); + return(KERN_FAILURE); +} + +kern_return_t +seqnos_memory_object_data_return(pager, seqno, pager_request, + offset, addr, data_cnt, + dirty, kernel_copy) + memory_object_t pager; + mach_port_seqno_t seqno; + mach_port_t pager_request; + vm_offset_t offset; + pointer_t addr; + vm_size_t data_cnt; + boolean_t dirty; + boolean_t kernel_copy; +{ + panic("%sdata_return",my_name); + return(KERN_FAILURE); +} + +kern_return_t +seqnos_memory_object_change_completed(pager, seqno, may_cache, copy_strategy) + memory_object_t pager; + mach_port_seqno_t seqno; + boolean_t may_cache; + memory_object_copy_strategy_t copy_strategy; +{ + panic("%schange_completed",my_name); + return(KERN_FAILURE); +} + + +boolean_t default_pager_notify_server(in, out) + mach_msg_header_t *in, *out; +{ + register mach_no_senders_notification_t *n = + (mach_no_senders_notification_t *) in; + + /* + * The only send-once rights we create are for + * receiving no-more-senders notifications. + * Hence, if we receive a message directed to + * a send-once right, we can assume it is + * a genuine no-senders notification from the kernel. + */ + + if ((n->not_header.msgh_bits != + MACH_MSGH_BITS(0, MACH_MSG_TYPE_PORT_SEND_ONCE)) || + (n->not_header.msgh_id != MACH_NOTIFY_NO_SENDERS)) + return FALSE; + + assert(n->not_header.msgh_size == sizeof *n); + assert(n->not_header.msgh_remote_port == MACH_PORT_NULL); + + assert(n->not_type.msgt_name == MACH_MSG_TYPE_INTEGER_32); + assert(n->not_type.msgt_size == 32); + assert(n->not_type.msgt_number == 1); + assert(n->not_type.msgt_inline); + assert(! n->not_type.msgt_longform); + + default_pager_no_senders(n->not_header.msgh_local_port, + n->not_header.msgh_seqno, n->not_count); + + out->msgh_remote_port = MACH_PORT_NULL; + return TRUE; +} + +extern boolean_t seqnos_memory_object_server(); +extern boolean_t seqnos_memory_object_default_server(); +extern boolean_t default_pager_server(); +extern boolean_t exc_server(); +extern boolean_t bootstrap_server(); +extern void bootstrap_compat(); + +mach_msg_size_t default_pager_msg_size_object = 128; + +boolean_t +default_pager_demux_object(in, out) + mach_msg_header_t *in; + mach_msg_header_t *out; +{ + /* + * We receive memory_object_data_initialize messages in + * the memory_object_default interface. + */ + +int rval; +ddprintf ("DPAGER DEMUX OBJECT <%p>: %d\n", in, in->msgh_id); +rval = + (seqnos_memory_object_server(in, out) || + seqnos_memory_object_default_server(in, out) || + default_pager_notify_server(in, out)); +ddprintf ("DPAGER DEMUX OBJECT DONE <%p>: %d\n", in, in->msgh_id); +return rval; +} + +mach_msg_size_t default_pager_msg_size_default = 8 * 1024; + +boolean_t +default_pager_demux_default(in, out) + mach_msg_header_t *in; + mach_msg_header_t *out; +{ + if (in->msgh_local_port == default_pager_default_port) { + /* + * We receive memory_object_create messages in + * the memory_object_default interface. + */ + +int rval; +ddprintf ("DPAGER DEMUX DEFAULT <%p>: %d\n", in, in->msgh_id); +rval = + (seqnos_memory_object_default_server(in, out) || + default_pager_server(in, out)); +ddprintf ("DPAGER DEMUX DEFAULT DONE <%p>: %d\n", in, in->msgh_id); +return rval; + } else if (in->msgh_local_port == default_pager_exception_port) { + /* + * We receive exception messages for + * ourself and the startup task. + */ + + return exc_server(in, out); + } else if (in->msgh_local_port == default_pager_bootstrap_port) { + /* + * We receive bootstrap requests + * from the startup task. + */ + + if (in->msgh_id == 999999) { + /* compatibility for old bootstrap interface */ + + bootstrap_compat(in, out); + return TRUE; + } + + return bootstrap_server(in, out); + } else { + panic(my_name); + return FALSE; + } +} + +/* + * We use multiple threads, for two reasons. + * + * First, memory objects created by default_pager_object_create + * are "external", instead of "internal". This means the kernel + * sends data (memory_object_data_write) to the object pageable. + * To prevent deadlocks, the external and internal objects must + * be managed by different threads. + * + * Second, the default pager uses synchronous IO operations. + * Spreading requests across multiple threads should + * recover some of the performance loss from synchronous IO. + * + * We have 3+ threads. + * One receives memory_object_create and + * default_pager_object_create requests. + * One or more manage internal objects. + * One or more manage external objects. + */ + +void +default_pager_thread_privileges() +{ + /* + * Set thread privileges. + */ + cthread_wire(); /* attach kernel thread to cthread */ + wire_thread(); /* grab a kernel stack and memory allocation + privileges */ +} + +any_t +default_pager_default_thread (arg) + any_t arg; +{ + kern_return_t kr; + default_pager_thread_privileges (); + for (;;) { + kr = mach_msg_server(default_pager_demux_default, + default_pager_msg_size_default, + default_pager_default_set); + panic(my_name, kr); + } +} + + + +any_t +default_pager_thread(arg) + any_t arg; +{ + default_pager_thread_t *dpt = (default_pager_thread_t *) arg; + mach_port_t pset; + kern_return_t kr; + + cthread_set_data(cthread_self(), (any_t) dpt); + + + /* + * Threads handling external objects cannot have + * privileges. Otherwise a burst of data-requests for an + * external object could empty the free-page queue, + * because the fault code only reserves real pages for + * requests sent to internal objects. + */ + + if (dpt->dpt_internal) { + default_pager_thread_privileges(); + pset = default_pager_internal_set; + } else { + pset = default_pager_external_set; + } + + for (;;) { + kr = mach_msg_server(default_pager_demux_object, + default_pager_msg_size_object, + pset); + panic(my_name, kr); + } +} + +void +start_default_pager_thread(internal) + boolean_t internal; +{ + default_pager_thread_t *dpt; + kern_return_t kr; + + dpt = (default_pager_thread_t *) kalloc(sizeof *dpt); + if (dpt == 0) + panic(my_name); + + dpt->dpt_internal = internal; + + kr = vm_allocate(default_pager_self, &dpt->dpt_buffer, + vm_page_size, TRUE); + if (kr != KERN_SUCCESS) + panic(my_name); + wire_memory(dpt->dpt_buffer, vm_page_size, + VM_PROT_READ|VM_PROT_WRITE); + + dpt->dpt_thread = cthread_fork(default_pager_thread, (any_t) dpt); +} + +void +default_pager_initialize(host_port) + mach_port_t host_port; +{ + memory_object_t DMM; + kern_return_t kr; + + /* + * This task will become the default pager. + */ + default_pager_self = mach_task_self(); + + /* + * Initialize the "default pager" port. + */ + kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE, + &default_pager_default_port); + if (kr != KERN_SUCCESS) + panic(my_name); + + DMM = default_pager_default_port; + kr = vm_set_default_memory_manager(host_port, &DMM); + if ((kr != KERN_SUCCESS) || MACH_PORT_VALID(DMM)) + panic(my_name); + + /* + * Initialize the exception port. + */ + kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE, + &default_pager_exception_port); + if (kr != KERN_SUCCESS) + panic(my_name); + + /* + * Initialize the bootstrap port. + */ + kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_RECEIVE, + &default_pager_bootstrap_port); + if (kr != KERN_SUCCESS) + panic(my_name); + + /* + * Arrange for wiring privileges. + */ + wire_setup(host_port); + + /* + * Find out how many CPUs we have, to determine the number + * of threads to create. + */ + if (default_pager_internal_count == 0) { + host_basic_info_data_t h_info; + natural_t h_info_count; + + h_info_count = HOST_BASIC_INFO_COUNT; + (void) host_info(host_port, HOST_BASIC_INFO, + (host_info_t)&h_info, &h_info_count); + + /* + * Random computation to get more parallelism on + * multiprocessors. + */ + default_pager_internal_count = + (h_info.avail_cpus > 32 ? 32 : h_info.avail_cpus) / 4 + 3; + } +} + +/* + * Initialize and Run the default pager + */ +void +default_pager() +{ + kern_return_t kr; + int i; + + default_pager_thread_privileges(); + + /* + * Wire down code, data, stack + */ + wire_all_memory(); + + + /* + * Initialize the list of all pagers. + */ + pager_port_list_init(); + + kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET, + &default_pager_internal_set); + if (kr != KERN_SUCCESS) + panic(my_name); + + kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET, + &default_pager_external_set); + if (kr != KERN_SUCCESS) + panic(my_name); + + kr = mach_port_allocate(default_pager_self, MACH_PORT_RIGHT_PORT_SET, + &default_pager_default_set); + if (kr != KERN_SUCCESS) + panic(my_name); + + kr = mach_port_move_member(default_pager_self, + default_pager_default_port, + default_pager_default_set); + if (kr != KERN_SUCCESS) + panic(my_name); + + kr = mach_port_move_member(default_pager_self, + default_pager_exception_port, + default_pager_default_set); + if (kr != KERN_SUCCESS) + panic(my_name); + + kr = mach_port_move_member(default_pager_self, + default_pager_bootstrap_port, + default_pager_default_set); + if (kr != KERN_SUCCESS) + panic(my_name); + + /* + * Now we create the threads that will actually + * manage objects. + */ + + for (i = 0; i < default_pager_internal_count; i++) + start_default_pager_thread(TRUE); + + for (i = 0; i < default_pager_external_count; i++) + start_default_pager_thread(FALSE); + + default_pager_default_thread(0); /* Become the default_pager server */ +#if 0 + cthread_fork (default_pager_default_thread, 0); + /* cthread_exit (cthread_self ()); */ + thread_suspend (mach_thread_self ()); +#endif +} + +/* + * Create an external object. + */ +kern_return_t default_pager_object_create(pager, mem_obj, size) + mach_port_t pager; + mach_port_t *mem_obj; + vm_size_t size; +{ + default_pager_t ds; + mach_port_t port; + kern_return_t result; + + if (pager != default_pager_default_port) + return KERN_INVALID_ARGUMENT; + + ds = pager_port_alloc(size); +rename_it: + port = (mach_port_t) pnameof(ds); + result = mach_port_allocate_name(default_pager_self, + MACH_PORT_RIGHT_RECEIVE, port); + if (result != KERN_SUCCESS) { + default_pager_t ds1; + + if (result != KERN_NAME_EXISTS) return (result); + + ds1 = (default_pager_t) kalloc(sizeof *ds1); + *ds1 = *ds; + mutex_lock(&all_pagers.lock); + queue_enter(&all_pagers.leak_queue, ds, default_pager_t, links); + mutex_unlock(&all_pagers.lock); + ds = ds1; + goto rename_it; + } + + /* + * Set up associations between these ports + * and this default_pager structure + */ + + ds->pager = port; + pager_port_list_insert(port, ds); + default_pager_add(ds, FALSE); + + *mem_obj = port; + return (KERN_SUCCESS); +} + +kern_return_t default_pager_info(pager, infop) + mach_port_t pager; + default_pager_info_t *infop; +{ + vm_size_t total, free; + + if (pager != default_pager_default_port) + return KERN_INVALID_ARGUMENT; + + mutex_lock(&all_partitions.lock); + paging_space_info(&total, &free); + mutex_unlock(&all_partitions.lock); + + infop->dpi_total_space = ptoa(total); + infop->dpi_free_space = ptoa(free); + infop->dpi_page_size = vm_page_size; + return KERN_SUCCESS; +} + +kern_return_t default_pager_objects(pager, objectsp, ocountp, portsp, pcountp) + mach_port_t pager; + default_pager_object_array_t *objectsp; + natural_t *ocountp; + mach_port_array_t *portsp; + natural_t *pcountp; +{ + vm_offset_t oaddr; /* memory for objects */ + vm_size_t osize; /* current size */ + default_pager_object_t *objects; + natural_t opotential; + + vm_offset_t paddr; /* memory for ports */ + vm_size_t psize; /* current size */ + mach_port_t *ports; + natural_t ppotential; + + unsigned int actual; + unsigned int num_pagers; + kern_return_t kr; + default_pager_t entry; + + if (pager != default_pager_default_port) + return KERN_INVALID_ARGUMENT; + + /* start with the inline memory */ + + num_pagers = 0; + + objects = *objectsp; + opotential = *ocountp; + + ports = *portsp; + ppotential = *pcountp; + + mutex_lock(&all_pagers.lock); + /* + * We will send no more than this many + */ + actual = all_pagers.count; + mutex_unlock(&all_pagers.lock); + + if (opotential < actual) { + vm_offset_t newaddr; + vm_size_t newsize; + + newsize = 2 * round_page(actual * sizeof *objects); + + kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE); + if (kr != KERN_SUCCESS) + goto nomemory; + + oaddr = newaddr; + osize = newsize; + opotential = osize/sizeof *objects; + objects = (default_pager_object_t *) oaddr; + } + + if (ppotential < actual) { + vm_offset_t newaddr; + vm_size_t newsize; + + newsize = 2 * round_page(actual * sizeof *ports); + + kr = vm_allocate(default_pager_self, &newaddr, newsize, TRUE); + if (kr != KERN_SUCCESS) + goto nomemory; + + paddr = newaddr; + psize = newsize; + ppotential = psize/sizeof *ports; + ports = (mach_port_t *) paddr; + } + + /* + * Now scan the list. + */ + + mutex_lock(&all_pagers.lock); + + num_pagers = 0; + queue_iterate(&all_pagers.queue, entry, default_pager_t, links) { + + mach_port_t port; + vm_size_t size; + + if ((num_pagers >= opotential) || + (num_pagers >= ppotential)) { + /* + * This should be rare. In any case, + * we will only miss recent objects, + * because they are added at the end. + */ + break; + } + + /* + * Avoid interfering with normal operations + */ + if (!mutex_try_lock(&entry->dpager.lock)) + goto not_this_one; + size = pager_allocated(&entry->dpager); + mutex_unlock(&entry->dpager.lock); + + dstruct_lock(entry); + + port = entry->pager_name; + if (port == MACH_PORT_NULL) { + /* + * The object is waiting for no-senders + * or memory_object_init. + */ + dstruct_unlock(entry); + goto not_this_one; + } + + /* + * We need a reference for the reply message. + * While we are unlocked, the bucket queue + * can change and the object might be terminated. + * memory_object_terminate will wait for us, + * preventing deallocation of the entry. + */ + + if (--entry->name_refs == 0) { + dstruct_unlock(entry); + + /* keep the list locked, wont take long */ + + kr = mach_port_mod_refs(default_pager_self, + port, MACH_PORT_RIGHT_SEND, + default_pager_max_urefs); + if (kr != KERN_SUCCESS) + panic("%sdefault_pager_objects",my_name); + + dstruct_lock(entry); + + entry->name_refs += default_pager_max_urefs; + pager_port_finish_refs(entry); + } + dstruct_unlock(entry); + + /* the arrays are wired, so no deadlock worries */ + + objects[num_pagers].dpo_object = (vm_offset_t) entry; + objects[num_pagers].dpo_size = size; + ports [num_pagers++] = port; + continue; +not_this_one: + /* + * Do not return garbage + */ + objects[num_pagers].dpo_object = (vm_offset_t) 0; + objects[num_pagers].dpo_size = 0; + ports [num_pagers++] = MACH_PORT_NULL; + + } + + mutex_unlock(&all_pagers.lock); + + /* + * Deallocate and clear unused memory. + * (Returned memory will automagically become pageable.) + */ + + if (objects == *objectsp) { + /* + * Our returned information fit inline. + * Nothing to deallocate. + */ + + *ocountp = num_pagers; + } else if (actual == 0) { + (void) vm_deallocate(default_pager_self, oaddr, osize); + + /* return zero items inline */ + *ocountp = 0; + } else { + vm_offset_t used; + + used = round_page(actual * sizeof *objects); + + if (used != osize) + (void) vm_deallocate(default_pager_self, + oaddr + used, osize - used); + + *objectsp = objects; + *ocountp = num_pagers; + } + + if (ports == *portsp) { + /* + * Our returned information fit inline. + * Nothing to deallocate. + */ + + *pcountp = num_pagers; + } else if (actual == 0) { + (void) vm_deallocate(default_pager_self, paddr, psize); + + /* return zero items inline */ + *pcountp = 0; + } else { + vm_offset_t used; + + used = round_page(actual * sizeof *ports); + + if (used != psize) + (void) vm_deallocate(default_pager_self, + paddr + used, psize - used); + + *portsp = ports; + *pcountp = num_pagers; + } + + return KERN_SUCCESS; + + nomemory: + + { + register int i; + for (i = 0; i < num_pagers; i++) + (void) mach_port_deallocate(default_pager_self, ports[i]); + } + + if (objects != *objectsp) + (void) vm_deallocate(default_pager_self, oaddr, osize); + + if (ports != *portsp) + (void) vm_deallocate(default_pager_self, paddr, psize); + + return KERN_RESOURCE_SHORTAGE; +} + + +kern_return_t +default_pager_object_pages(pager, object, pagesp, countp) + mach_port_t pager; + mach_port_t object; + default_pager_page_array_t *pagesp; + natural_t *countp; +{ + vm_offset_t addr; /* memory for page offsets */ + vm_size_t size; /* current memory size */ + default_pager_page_t *pages; + natural_t potential, actual; + kern_return_t kr; + + if (pager != default_pager_default_port) + return KERN_INVALID_ARGUMENT; + + /* we start with the inline space */ + + pages = *pagesp; + potential = *countp; + + for (;;) { + default_pager_t entry; + + mutex_lock(&all_pagers.lock); + queue_iterate(&all_pagers.queue, entry, default_pager_t, links) { + dstruct_lock(entry); + if (entry->pager_name == object) { + mutex_unlock(&all_pagers.lock); + goto found_object; + } + dstruct_unlock(entry); + } + mutex_unlock(&all_pagers.lock); + + /* did not find the object */ + + if (pages != *pagesp) + (void) vm_deallocate(default_pager_self, addr, size); + return KERN_INVALID_ARGUMENT; + + found_object: + + if (!mutex_try_lock(&entry->dpager.lock)) { + /* oh well bad luck */ + + dstruct_unlock(entry); + + /* yield the processor */ + (void) thread_switch(MACH_PORT_NULL, + SWITCH_OPTION_NONE, 0); + continue; + } + + actual = pager_pages(&entry->dpager, pages, potential); + mutex_unlock(&entry->dpager.lock); + dstruct_unlock(entry); + + if (actual <= potential) + break; + + /* allocate more memory */ + + if (pages != *pagesp) + (void) vm_deallocate(default_pager_self, addr, size); + size = round_page(actual * sizeof *pages); + kr = vm_allocate(default_pager_self, &addr, size, TRUE); + if (kr != KERN_SUCCESS) + return kr; + pages = (default_pager_page_t *) addr; + potential = size/sizeof *pages; + } + + /* + * Deallocate and clear unused memory. + * (Returned memory will automagically become pageable.) + */ + + if (pages == *pagesp) { + /* + * Our returned information fit inline. + * Nothing to deallocate. + */ + + *countp = actual; + } else if (actual == 0) { + (void) vm_deallocate(default_pager_self, addr, size); + + /* return zero items inline */ + *countp = 0; + } else { + vm_offset_t used; + + used = round_page(actual * sizeof *pages); + + if (used != size) + (void) vm_deallocate(default_pager_self, + addr + used, size - used); + + *pagesp = pages; + *countp = actual; + } + return KERN_SUCCESS; +} + +/* + * Add/remove extra paging space + */ + +extern mach_port_t bootstrap_master_device_port; +extern mach_port_t bootstrap_master_host_port; + +kern_return_t +default_pager_paging_file(pager, mdport, file_name, add) + mach_port_t pager; + mach_port_t mdport; + default_pager_filename_t file_name; + boolean_t add; +{ + kern_return_t kr; + + if (pager != default_pager_default_port) + return KERN_INVALID_ARGUMENT; + +#if 0 +dprintf("bmd %x md %x\n", bootstrap_master_device_port, mdport); +#endif + if (add) { + kr = add_paging_file(bootstrap_master_device_port, + file_name, 0); + } else { + kr = remove_paging_file(file_name); + } + + /* XXXX more code needed */ + if (mdport != bootstrap_master_device_port) + mach_port_deallocate( mach_task_self(), mdport); + + return kr; +} + +default_pager_register_fileserver(pager, fileserver) + mach_port_t pager; + mach_port_t fileserver; +{ + if (pager != default_pager_default_port) + return KERN_INVALID_ARGUMENT; +#if notyet + mach_port_deallocate(mach_task_self(), fileserver); + if (0) dp_helper_paging_space(0,0,0);/*just linkit*/ +#endif + return KERN_SUCCESS; +} + +/* + * When things do not quite workout... + */ +no_paging_space(out_of_memory) + boolean_t out_of_memory; +{ + static char here[] = "%s *** NOT ENOUGH PAGING SPACE ***"; + + if (out_of_memory) + dprintf("*** OUT OF MEMORY *** "); + panic(here, my_name); +} + +overcommitted(got_more_space, space) + boolean_t got_more_space; + vm_size_t space; /* in pages */ +{ + vm_size_t pages_free, pages_total; + + static boolean_t user_warned = FALSE; + static vm_size_t pages_shortage = 0; + + paging_space_info(&pages_total, &pages_free); + + /* + * If user added more space, see if it is enough + */ + if (got_more_space) { + pages_free -= pages_shortage; + if (pages_free > 0) { + pages_shortage = 0; + if (user_warned) + dprintf("%s paging space ok now.\n", my_name); + } else + pages_shortage = pages_free; + user_warned = FALSE; + return; + } + /* + * We ran out of gas, let user know. + */ + pages_free -= space; + pages_shortage = (pages_free > 0) ? 0 : -pages_free; + if (!user_warned && pages_shortage) { + user_warned = TRUE; + dprintf("%s paging space over-committed.\n", my_name); + } +#if debug + user_warned = FALSE; + dprintf("%s paging space over-committed [+%d (%d) pages].\n", + my_name, space, pages_shortage); +#endif +} + +paging_space_info(totp, freep) + vm_size_t *totp, *freep; +{ + register vm_size_t total, free; + register partition_t part; + register int i; + + total = free = 0; + for (i = 0; i < all_partitions.n_partitions; i++) { + + if ((part = partition_of(i)) == 0) continue; + + /* no need to lock: by the time this data + gets back to any remote requestor it + will be obsolete anyways */ + total += part->total_size; + free += part->free; +#if debug + dprintf("Partition %d: x%x total, x%x free\n", + i, part->total_size, part->free); +#endif + } + *totp = total; + *freep = free; +} + +/* + * Catch exceptions. + */ + +kern_return_t +catch_exception_raise(exception_port, thread, task, exception, code, subcode) + mach_port_t exception_port; + mach_port_t thread, task; + int exception, code, subcode; +{ + ddprintf ("(default_pager)catch_exception_raise(%d,%d,%d)\n", + exception, code, subcode); + panic(my_name); + + /* mach_msg_server will deallocate thread/task for us */ + + return KERN_FAILURE; +} + +/* + * Handle bootstrap requests. + */ + +kern_return_t +do_bootstrap_privileged_ports(bootstrap, hostp, devicep) + mach_port_t bootstrap; + mach_port_t *hostp, *devicep; +{ + *hostp = bootstrap_master_host_port; + *devicep = bootstrap_master_device_port; + return KERN_SUCCESS; +} + +void +bootstrap_compat(in, out) + mach_msg_header_t *in, *out; +{ + mig_reply_header_t *reply = (mig_reply_header_t *) out; + mach_msg_return_t mr; + + struct imsg { + mach_msg_header_t hdr; + mach_msg_type_t port_desc_1; + mach_port_t port_1; + mach_msg_type_t port_desc_2; + mach_port_t port_2; + } imsg; + + /* + * Send back the host and device ports. + */ + + imsg.hdr.msgh_bits = MACH_MSGH_BITS_COMPLEX | + MACH_MSGH_BITS(MACH_MSGH_BITS_REMOTE(in->msgh_bits), 0); + /* msgh_size doesn't need to be initialized */ + imsg.hdr.msgh_remote_port = in->msgh_remote_port; + imsg.hdr.msgh_local_port = MACH_PORT_NULL; + /* msgh_seqno doesn't need to be initialized */ + imsg.hdr.msgh_id = in->msgh_id + 100; /* this is a reply msg */ + + imsg.port_desc_1.msgt_name = MACH_MSG_TYPE_COPY_SEND; + imsg.port_desc_1.msgt_size = (sizeof(mach_port_t) * 8); + imsg.port_desc_1.msgt_number = 1; + imsg.port_desc_1.msgt_inline = TRUE; + imsg.port_desc_1.msgt_longform = FALSE; + imsg.port_desc_1.msgt_deallocate = FALSE; + imsg.port_desc_1.msgt_unused = 0; + + imsg.port_1 = bootstrap_master_host_port; + + imsg.port_desc_2 = imsg.port_desc_1; + + imsg.port_2 = bootstrap_master_device_port; + + /* + * Send the reply message. + * (mach_msg_server can not do this, because the reply + * is not in standard format.) + */ + + mr = mach_msg(&imsg.hdr, MACH_SEND_MSG, + sizeof imsg, 0, MACH_PORT_NULL, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + if (mr != MACH_MSG_SUCCESS) + (void) mach_port_deallocate(default_pager_self, + imsg.hdr.msgh_remote_port); + + /* + * Tell mach_msg_server to do nothing. + */ + + reply->RetCode = MIG_NO_REPLY; +} + +#ifdef mips +/* + * set_ras_address for default pager + * Default pager does not have emulator support + * so it needs a local version of set_ras_address. + */ +int +set_ras_address(basepc, boundspc) + vm_offset_t basepc; + vm_offset_t boundspc; +{ + kern_return_t status; + + status = task_ras_control(mach_task_self(), basepc, boundspc, + TASK_RAS_CONTROL_INSTALL_ONE); + if (status != KERN_SUCCESS) + return -1; + return 0; +} +#endif diff --git a/serverboot/defs.h b/serverboot/defs.h new file mode 100644 index 00000000..7b872fd6 --- /dev/null +++ b/serverboot/defs.h @@ -0,0 +1,95 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Common definitions for Berkeley Fast File System. + */ + +/* + * Compatibility definitions for disk IO. + */ + +/* + * Disk devices do all IO in 512-byte blocks. + */ +#define DEV_BSIZE 512 + +/* + * Conversion between bytes and disk blocks. + */ +#define btodb(byte_offset) ((byte_offset) >> 9) +#define dbtob(block_number) ((block_number) << 9) + +/* + * Compatibility definitions for old type names. + */ + +typedef struct _quad_ { + unsigned int val[2]; /* 2 int values make... */ +} quad; /* an 8-byte item */ + +#if 0 +typedef unsigned char u_char; /* unsigned char */ +typedef unsigned short u_short; /* unsigned short */ +typedef unsigned int u_int; /* unsigned int */ + +typedef unsigned int time_t; /* an unsigned int */ +typedef unsigned int daddr_t; /* an unsigned int */ +typedef unsigned int off_t; /* another unsigned int */ + +typedef unsigned short uid_t; +typedef unsigned short gid_t; +typedef unsigned int ino_t; +#endif + +#define NBBY 8 + +/* + * The file system is made out of blocks of at most MAXBSIZE units, + * with smaller units (fragments) only in the last direct block. + * MAXBSIZE primarily determines the size of buffers in the buffer + * pool. It may be made larger without any effect on existing + * file systems; however, making it smaller may make some file + * systems unmountable. + * + * Note that the disk devices are assumed to have DEV_BSIZE "sectors" + * and that fragments must be some multiple of this size. + */ +#define MAXBSIZE 8192 +#define MAXFRAG 8 + +/* + * MAXPATHLEN defines the longest permissible path length + * after expanding symbolic links. + * + * MAXSYMLINKS defines the maximum number of symbolic links + * that may be expanded in a path name. It should be set + * high enough to allow all legitimate uses, but halt infinite + * loops reasonably quickly. + */ + +#define MAXPATHLEN 1024 +#define MAXSYMLINKS 8 + diff --git a/serverboot/dir.h b/serverboot/dir.h new file mode 100644 index 00000000..208df5ce --- /dev/null +++ b/serverboot/dir.h @@ -0,0 +1,142 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Copyright (c) 1982, 1986, 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * @(#)dir.h 7.6 (Berkeley) 5/9/89 + */ + +#ifndef _BOOT_UFS_DIR_H_ +#define _BOOT_UFS_DIR_H_ + +/* + * A directory consists of some number of blocks of DIRBLKSIZ + * bytes, where DIRBLKSIZ is chosen such that it can be transferred + * to disk in a single atomic operation (e.g. 512 bytes on most machines). + * + * Each DIRBLKSIZ byte block contains some number of directory entry + * structures, which are of variable length. Each directory entry has + * a struct direct at the front of it, containing its inode number, + * the length of the entry, and the length of the name contained in + * the entry. These are followed by the name padded to a 4 byte boundary + * with null bytes. All names are guaranteed null terminated. + * The maximum length of a name in a directory is MAXNAMLEN. + * + * The macro DIRSIZ(dp) gives the amount of space required to represent + * a directory entry. Free space in a directory is represented by + * entries which have dp->d_reclen > DIRSIZ(dp). All DIRBLKSIZ bytes + * in a directory block are claimed by the directory entries. This + * usually results in the last entry in a directory having a large + * dp->d_reclen. When entries are deleted from a directory, the + * space is returned to the previous entry in the same directory + * block by increasing its dp->d_reclen. If the first entry of + * a directory block is free, then its dp->d_ino is set to 0. + * Entries other than the first in a directory do not normally have + * dp->d_ino set to 0. + */ +#define DIRBLKSIZ DEV_BSIZE +#define MAXNAMLEN 255 + +struct direct { + u_int d_ino; /* inode number of entry */ + u_short d_reclen; /* length of this record */ + u_short d_namlen; /* length of string in d_name */ + char d_name[MAXNAMLEN + 1]; /* name with length <= MAXNAMLEN */ +}; + +/* + * The DIRSIZ macro gives the minimum record length which will hold + * the directory entry. This requires the amount of space in struct direct + * without the d_name field, plus enough space for the name with a terminating + * null byte (dp->d_namlen+1), rounded up to a 4 byte boundary. + */ +#undef DIRSIZ +#define DIRSIZ(dp) \ + ((sizeof (struct direct) - (MAXNAMLEN+1)) + (((dp)->d_namlen+1 + 3) &~ 3)) + +#ifdef KERNEL +/* + * Template for manipulating directories. + * Should use struct direct's, but the name field + * is MAXNAMLEN - 1, and this just won't do. + */ +struct dirtemplate { + u_int dot_ino; + short dot_reclen; + short dot_namlen; + char dot_name[4]; /* must be multiple of 4 */ + u_int dotdot_ino; + short dotdot_reclen; + short dotdot_namlen; + char dotdot_name[4]; /* ditto */ +}; +#endif + +/* + * The following information should be obtained from <dirent.h> + * and is provided solely (and temporarily) for backward compatibility. + */ +#ifndef KERNEL +#define d_fileno d_ino /* compatibility with POSIX */ +#ifndef DEV_BSIZE +#define DEV_BSIZE 512 +#endif +/* + * Definitions for library routines operating on directories. + */ +typedef struct _dirdesc { + int dd_fd; + int dd_loc; + int dd_size; + char dd_buf[DIRBLKSIZ]; +} DIR; + +#define dirfd(dirp) ((dirp)->dd_fd) + +#ifndef NULL +#define NULL 0 +#endif +extern DIR *opendir(); +extern struct direct *readdir(); +extern int telldir(); +extern void seekdir(); +#define rewinddir(dirp) seekdir((dirp), (long)0) +extern void closedir(); +#endif /* not KERNEL */ +#endif /* _BOOT_UFS_DIR_H_ */ diff --git a/serverboot/disk_inode.h b/serverboot/disk_inode.h new file mode 100644 index 00000000..6eed9104 --- /dev/null +++ b/serverboot/disk_inode.h @@ -0,0 +1,101 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Copyright (c) 1982, 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * @(#)inode.h 7.5 (Berkeley) 7/3/89 + */ + +#ifndef _BOOT_UFS_DISK_INODE_H_ +#define _BOOT_UFS_DISK_INODE_H_ + +/* + * The I node is the focus of all file activity in the BSD Fast File System. + * There is a unique inode allocated for each active file, + * each current directory, each mounted-on file, text file, and the root. + * An inode is 'named' by its dev/inumber pair. (iget/iget.c) + * Data in icommon is read in from permanent inode on volume. + */ + +#define FFS_NDADDR 12 /* direct addresses in inode */ +#define FFS_NIADDR 3 /* indirect addresses in inode */ + +#define FFS_MAX_FASTLINK_SIZE ((FFS_NDADDR + FFS_NIADDR) * sizeof(daddr_t)) + +struct icommon { + u_short ic_mode; /* 0: mode and type of file */ + short ic_nlink; /* 2: number of links to file */ + short ic_uid; /* 4: owner's user id */ + short ic_gid; /* 6: owner's group id */ + quad ic_size; /* 8: number of bytes in file */ + time_t ic_atime; /* 16: time last accessed */ + int ic_atspare; + time_t ic_mtime; /* 24: time last modified */ + int ic_mtspare; + time_t ic_ctime; /* 32: last time inode changed */ + int ic_ctspare; + union { + struct { + daddr_t Mb_db[FFS_NDADDR]; /* 40: disk block addresses */ + daddr_t Mb_ib[FFS_NIADDR]; /* 88: indirect blocks */ + } ic_Mb; + char ic_Msymlink[FFS_MAX_FASTLINK_SIZE]; + /* 40: symbolic link name */ + } ic_Mun; +#define ic_db ic_Mun.ic_Mb.Mb_db +#define ic_ib ic_Mun.ic_Mb.Mb_ib +#define ic_symlink ic_Mun.ic_Msymlink + int ic_flags; /* 100: status, currently unused */ + int ic_blocks; /* 104: blocks actually held */ + int ic_gen; /* 108: generation number */ + int ic_spare[4]; /* 112: reserved, currently unused */ +} i_ic; + +/* + * Same structure, but on disk. + */ +struct dinode { + union { + struct icommon di_com; + char di_char[128]; + } di_un; +}; +#define di_ic di_un.di_com + +#endif /* _BOOT_UFS_DISK_INODE_H_ */ diff --git a/serverboot/disk_inode_ffs.h b/serverboot/disk_inode_ffs.h new file mode 100644 index 00000000..43690b2f --- /dev/null +++ b/serverboot/disk_inode_ffs.h @@ -0,0 +1,99 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Copyright (c) 1982, 1989 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * @(#)inode.h 7.5 (Berkeley) 7/3/89 + */ + +#ifndef _BOOT_UFS_DISK_INODE_FFS_H_ +#define _BOOT_UFS_DISK_INODE_FFS_H_ + +#define NDADDR FFS_NDADDR +#define NIADDR FFS_NIADDR + +#define MAX_FASTLINK_SIZE FFS_MAX_FASTLINK_SIZE + +#define IC_FASTLINK 0x0001 /* Symbolic link in inode */ + +#define i_mode i_ic.ic_mode +#define i_nlink i_ic.ic_nlink +#define i_uid i_ic.ic_uid +#define i_gid i_ic.ic_gid +#if BYTE_MSF +#define i_size i_ic.ic_size.val[1] +#else /* BYTE_LSF */ +#define i_size i_ic.ic_size.val[0] +#endif +#define i_db i_ic.ic_db +#define i_ib i_ic.ic_ib +#define i_atime i_ic.ic_atime +#define i_mtime i_ic.ic_mtime +#define i_ctime i_ic.ic_ctime +#define i_blocks i_ic.ic_blocks +#define i_rdev i_ic.ic_db[0] +#define i_symlink i_ic.ic_symlink +#define i_flags i_ic.ic_flags +#define i_gen i_ic.ic_gen + +/* modes */ +#define IFMT 0xf000 /* type of file */ +#define IFCHR 0x2000 /* character special */ +#define IFDIR 0x4000 /* directory */ +#define IFBLK 0x6000 /* block special */ +#define IFREG 0x8000 /* regular */ +#define IFLNK 0xa000 /* symbolic link */ +#define IFSOCK 0xc000 /* socket */ + + +#define ISUID 0x0800 /* set user id on execution */ +#define ISGID 0x0400 /* set group id on execution */ +#define ISVTX 0x0200 /* save swapped text even after use */ +#define IREAD 0x0100 /* read, write, execute permissions */ +#define IWRITE 0x0080 +#define IEXEC 0x0040 + +#define f_fs u.ffs.ffs_fs +#define i_ic u.ffs.ffs_ic +#define f_nindir u.ffs.ffs_nindir +#define f_blk u.ffs.ffs_blk +#define f_blksize u.ffs.ffs_blksize +#define f_blkno u.ffs.ffs_blkno + +#endif _BOOT_UFS_DISK_INODE_FFS_H_ diff --git a/serverboot/elf-load.c b/serverboot/elf-load.c new file mode 100644 index 00000000..a30124a2 --- /dev/null +++ b/serverboot/elf-load.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 1995, 1994, 1993, 1992, 1991, 1990 + * Open Software Foundation, Inc. + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose and without fee is hereby granted, + * provided that the above copyright notice appears in all copies and + * that both the copyright notice and this permission notice appear in + * supporting documentation, and that the name of ("OSF") or Open Software + * Foundation not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior permission. + * + * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL OSF BE LIABLE FOR ANY + * SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN + * ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE + */ +/* + * OSF Research Institute MK6.1 (unencumbered) 1/31/1995 + */ + +#include <alloca.h> +#include <mach/machine/vm_types.h> +#include <elf.h> +#include "mach-exec.h" + +int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec, + void *handle, exec_info_t *out_info) +{ + vm_size_t actual; + Elf32_Ehdr x; + Elf32_Phdr *phdr, *ph; + vm_size_t phsize; + int i; + int result; + + /* Read the ELF header. */ + if ((result = (*read)(handle, 0, &x, sizeof(x), &actual)) != 0) + return result; + if (actual < sizeof(x)) + return EX_NOT_EXECUTABLE; + + if ((x.e_ident[EI_MAG0] != ELFMAG0) || + (x.e_ident[EI_MAG1] != ELFMAG1) || + (x.e_ident[EI_MAG2] != ELFMAG2) || + (x.e_ident[EI_MAG3] != ELFMAG3)) + return EX_NOT_EXECUTABLE; + + /* Make sure the file is of the right architecture. */ +#ifdef i386 + if ((x.e_ident[EI_CLASS] != ELFCLASS32) || + (x.e_ident[EI_DATA] != ELFDATA2LSB) || + (x.e_machine != EM_386)) + return EX_WRONG_ARCH; +#else +#error Not ported to this architecture! +#endif + + /* XXX others */ + out_info->entry = (vm_offset_t) x.e_entry; + + phsize = x.e_phnum * x.e_phentsize; + phdr = (Elf32_Phdr *)alloca(phsize); + + result = (*read)(handle, x.e_phoff, phdr, phsize, &actual); + if (result) + return result; + if (actual < phsize) + return EX_CORRUPT; + + for (i = 0; i < x.e_phnum; i++) + { + ph = (Elf32_Phdr *)((vm_offset_t)phdr + i * x.e_phentsize); + if (ph->p_type == PT_LOAD) + { + exec_sectype_t type = EXEC_SECTYPE_ALLOC | + EXEC_SECTYPE_LOAD; + if (ph->p_flags & PF_R) type |= EXEC_SECTYPE_READ; + if (ph->p_flags & PF_W) type |= EXEC_SECTYPE_WRITE; + if (ph->p_flags & PF_X) type |= EXEC_SECTYPE_EXECUTE; + result = (*read_exec)(handle, + ph->p_offset, ph->p_filesz, + ph->p_vaddr, ph->p_memsz, type); + } + } + + return 0; +} + diff --git a/serverboot/exec.c b/serverboot/exec.c new file mode 100644 index 00000000..a0773f4c --- /dev/null +++ b/serverboot/exec.c @@ -0,0 +1,88 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * i386-specific routines for loading a.out files. + */ + +#include <mach.h> +#include <mach/machine/vm_param.h> +#include <mach/machine/eflags.h> +#include "mach-exec.h" + +#include <file_io.h> + +/* + * Machine-dependent portions of execve() for the i386. + */ + +#define STACK_SIZE (64*1024) + +char *set_regs( + mach_port_t user_task, + mach_port_t user_thread, + struct exec_info *info, + int arg_size) +{ + vm_offset_t stack_start; + vm_offset_t stack_end; + struct i386_thread_state regs; + unsigned int reg_size; + + /* + * Add space for 5 ints to arguments, for + * PS program. XXX + */ + arg_size += 5 * sizeof(int); + + /* + * Allocate stack. + */ + stack_end = VM_MAX_ADDRESS; + stack_start = VM_MAX_ADDRESS - STACK_SIZE; + (void)vm_allocate(user_task, + &stack_start, + (vm_size_t)(stack_end - stack_start), + FALSE); + + reg_size = i386_THREAD_STATE_COUNT; + (void)thread_get_state(user_thread, + i386_THREAD_STATE, + (thread_state_t)®s, + ®_size); + + regs.eip = info->entry; + regs.uesp = (int)((stack_end - arg_size) & ~(sizeof(int)-1)); + + /* regs.efl |= EFL_TF; trace flag*/ + + (void)thread_set_state(user_thread, + i386_THREAD_STATE, + (thread_state_t)®s, + reg_size); + + return (char *)regs.uesp; +} + diff --git a/serverboot/ext2_file_io.c b/serverboot/ext2_file_io.c new file mode 100644 index 00000000..28ee2cb3 --- /dev/null +++ b/serverboot/ext2_file_io.c @@ -0,0 +1,983 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Stand-alone file reading package. + */ + +#include <device/device_types.h> +#include <device/device.h> + +#include <mach/mach_traps.h> +#include <mach/mach_interface.h> + +#include "file_io.h" +#include "ffs_compat.h" + +void ext2_close_file(); /* forward */ + +/* + * Free file buffers, but don't close file. + */ +static void +free_file_buffers(fp) + register struct file *fp; +{ + register int level; + + /* + * Free the indirect blocks + */ + for (level = 0; level < NIADDR; level++) { + if (fp->f_blk[level] != 0) { + (void) vm_deallocate(mach_task_self(), + fp->f_blk[level], + fp->f_blksize[level]); + fp->f_blk[level] = 0; + } + fp->f_blkno[level] = -1; + } + + /* + * Free the data block + */ + if (fp->f_buf != 0) { + (void) vm_deallocate(mach_task_self(), + fp->f_buf, + fp->f_buf_size); + fp->f_buf = 0; + } + fp->f_buf_blkno = -1; +} + +/* + * Read a new inode into a file structure. + */ +static int +read_inode(inumber, fp) + ino_t inumber; + register struct file *fp; +{ + vm_offset_t buf; + mach_msg_type_number_t buf_size; + register + struct ext2_super_block *fs; + daddr_t disk_block; + kern_return_t rc; + + fs = fp->f_fs; + disk_block = ino2blk(fs, fp->f_gd, inumber); + + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fp->f_fs, disk_block), + (int) EXT2_BLOCK_SIZE(fs), + (char **)&buf, + &buf_size); + if (rc != KERN_SUCCESS) + return (rc); + + { + register struct ext2_inode *dp; + + dp = (struct ext2_inode *)buf; + dp += itoo(fs, inumber); + fp->i_ic = *dp; + fp->f_size = dp->i_size; + } + + (void) vm_deallocate(mach_task_self(), buf, buf_size); + + /* + * Clear out the old buffers + */ + free_file_buffers(fp); + + return (0); +} + +/* + * Given an offset in a file, find the disk block number that + * contains that block. + */ +static int +block_map(fp, file_block, disk_block_p) + struct file *fp; + daddr_t file_block; + daddr_t *disk_block_p; /* out */ +{ + int level; + int idx; + daddr_t ind_block_num; + kern_return_t rc; + + vm_offset_t olddata[NIADDR+1]; + vm_size_t oldsize[NIADDR+1]; + + /* + * Index structure of an inode: + * + * i_db[0..NDADDR-1] hold block numbers for blocks + * 0..NDADDR-1 + * + * i_ib[0] index block 0 is the single indirect + * block + * holds block numbers for blocks + * NDADDR .. NDADDR + NINDIR(fs)-1 + * + * i_ib[1] index block 1 is the double indirect + * block + * holds block numbers for INDEX blocks + * for blocks + * NDADDR + NINDIR(fs) .. + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 + * + * i_ib[2] index block 2 is the triple indirect + * block + * holds block numbers for double-indirect + * blocks for blocks + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 + * + NINDIR(fs)**3 - 1 + */ + + mutex_lock(&fp->f_lock); + + if (file_block < NDADDR) { + /* Direct block. */ + *disk_block_p = fp->i_ic.i_block[file_block]; + mutex_unlock(&fp->f_lock); + return (0); + } + + file_block -= NDADDR; + + /* + * nindir[0] = NINDIR + * nindir[1] = NINDIR**2 + * nindir[2] = NINDIR**3 + * etc + */ + for (level = 0; level < NIADDR; level++) { + if (file_block < fp->f_nindir[level]) + break; + file_block -= fp->f_nindir[level]; + } + if (level == NIADDR) { + /* Block number too high */ + mutex_unlock(&fp->f_lock); + return (FS_NOT_IN_FILE); + } + + ind_block_num = fp->i_ic.i_block[level + NDADDR]; + + /* + * Initialize array of blocks to free. + */ + for (idx = 0; idx < NIADDR; idx++) + oldsize[idx] = 0; + + for (; level >= 0; level--) { + + vm_offset_t data; + mach_msg_type_number_t size; + + if (ind_block_num == 0) + break; + + if (fp->f_blkno[level] == ind_block_num) { + /* + * Cache hit. Just pick up the data. + */ + + data = fp->f_blk[level]; + } + else { + /* + * Drop our lock while doing the read. + * (The f_dev and f_fs fields don`t change.) + */ + mutex_unlock(&fp->f_lock); + + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fp->f_fs, ind_block_num), + EXT2_BLOCK_SIZE(fp->f_fs), + (char **)&data, + &size); + if (rc != KERN_SUCCESS) + return (rc); + + /* + * See if we can cache the data. Need a write lock to + * do this. While we hold the write lock, we can`t do + * *anything* which might block for memory. Otherwise + * a non-privileged thread might deadlock with the + * privileged threads. We can`t block while taking the + * write lock. Otherwise a non-privileged thread + * blocked in the vm_deallocate (while holding a read + * lock) will block a privileged thread. For the same + * reason, we can`t take a read lock and then use + * lock_read_to_write. + */ + + mutex_lock(&fp->f_lock); + + olddata[level] = fp->f_blk[level]; + oldsize[level] = fp->f_blksize[level]; + + fp->f_blkno[level] = ind_block_num; + fp->f_blk[level] = data; + fp->f_blksize[level] = size; + + /* + * Return to holding a read lock, and + * dispose of old data. + */ + + } + + if (level > 0) { + idx = file_block / fp->f_nindir[level-1]; + file_block %= fp->f_nindir[level-1]; + } + else + idx = file_block; + + ind_block_num = ((daddr_t *)data)[idx]; + } + + mutex_unlock(&fp->f_lock); + + /* + * After unlocking the file, free any blocks that + * we need to free. + */ + for (idx = 0; idx < NIADDR; idx++) + if (oldsize[idx] != 0) + (void) vm_deallocate(mach_task_self(), + olddata[idx], + oldsize[idx]); + + *disk_block_p = ind_block_num; + return (0); +} + +/* + * Read a portion of a file into an internal buffer. Return + * the location in the buffer and the amount in the buffer. + */ +static int +buf_read_file(fp, offset, buf_p, size_p) + register struct file *fp; + vm_offset_t offset; + vm_offset_t *buf_p; /* out */ + vm_size_t *size_p; /* out */ +{ + register + struct ext2_super_block *fs; + vm_offset_t off; + register daddr_t file_block; + daddr_t disk_block; + int rc; + vm_offset_t block_size; + + if (offset >= fp->i_ic.i_size) + return (FS_NOT_IN_FILE); + + fs = fp->f_fs; + + off = blkoff(fs, offset); + file_block = lblkno(fs, offset); + block_size = blksize(fs, fp, file_block); + + if (file_block != fp->f_buf_blkno) { + rc = block_map(fp, file_block, &disk_block); + if (rc != 0) + return (rc); + + if (fp->f_buf) + (void)vm_deallocate(mach_task_self(), + fp->f_buf, + fp->f_buf_size); + + if (disk_block == 0) { + (void)vm_allocate(mach_task_self(), + &fp->f_buf, + block_size, + TRUE); + fp->f_buf_size = block_size; + } + else { + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fs, disk_block), + (int) block_size, + (char **) &fp->f_buf, + (mach_msg_type_number_t *)&fp->f_buf_size); + } + if (rc) + return (rc); + + fp->f_buf_blkno = file_block; + } + + /* + * Return address of byte in buffer corresponding to + * offset, and size of remainder of buffer after that + * byte. + */ + *buf_p = fp->f_buf + off; + *size_p = block_size - off; + + /* + * But truncate buffer at end of file. + */ + if (*size_p > fp->i_ic.i_size - offset) + *size_p = fp->i_ic.i_size - offset; + + return (0); +} + +/* + * Search a directory for a name and return its + * i_number. + */ +static int +search_directory(name, fp, inumber_p) + char * name; + register struct file *fp; + ino_t *inumber_p; /* out */ +{ + vm_offset_t buf; + vm_size_t buf_size; + vm_offset_t offset; + struct ext2_dir_entry_2 *dp; + int length; + kern_return_t rc; + char tmp_name[256]; + + length = strlen(name); + + offset = 0; + while (offset < fp->i_ic.i_size) { + rc = buf_read_file(fp, offset, &buf, &buf_size); + if (rc != KERN_SUCCESS) + return (rc); + + dp = (struct ext2_dir_entry_2 *)buf; + if (dp->inode != 0) { + strncpy (tmp_name, dp->name, dp->name_len); + tmp_name[dp->name_len] = '\0'; + if (dp->name_len == length && + !strcmp(name, tmp_name)) + { + /* found entry */ + *inumber_p = dp->inode; + return (0); + } + } + offset += dp->rec_len; + } + return (FS_NO_ENTRY); +} + +static int +read_fs(dev, fsp, gdp, gd_size_p) + mach_port_t dev; + struct ext2_super_block **fsp; + struct ext2_group_desc **gdp; + vm_size_t *gd_size_p; +{ + register + struct ext2_super_block *fs; + vm_offset_t buf; + vm_offset_t buf2; + mach_msg_type_number_t buf_size; + mach_msg_type_number_t buf2_size; + int error; + int gd_count; + int gd_blocks; + int gd_size; + int gd_location; + int gd_sector; + + /* + * Read the super block + */ + error = device_read(dev, 0, (recnum_t) SBLOCK, SBSIZE, + (char **) &buf, &buf_size); + if (error) + return (error); + + /* + * Check the superblock + */ + fs = (struct ext2_super_block *)buf; + if (fs->s_magic != EXT2_SUPER_MAGIC) { + (void) vm_deallocate(mach_task_self(), buf, buf_size); + return (FS_INVALID_FS); + } + + *fsp = fs; + + /* + * Compute the groups informations + */ + gd_count = (fs->s_blocks_count - fs->s_first_data_block + + fs->s_blocks_per_group - 1) / fs->s_blocks_per_group; + gd_blocks = (gd_count + EXT2_DESC_PER_BLOCK(fs) - 1) / + EXT2_DESC_PER_BLOCK(fs); + gd_size = gd_blocks * EXT2_BLOCK_SIZE(fs); + gd_location = fs->s_first_data_block + 1; + gd_sector = (gd_location * EXT2_BLOCK_SIZE(fs)) / DEV_BSIZE; + + /* + * Read the groups descriptors + */ + error = device_read(dev, 0, (recnum_t) gd_sector, gd_size, + (char **) &buf2, &buf2_size); + if (error) { + (void) vm_deallocate(mach_task_self(), buf, buf_size); + return error; + } + + *gdp = (struct ext2_group_desc *) buf2; + *gd_size_p = gd_size; + + return 0; +} + +static int +mount_fs(fp) + register struct file *fp; +{ + register struct ext2_super_block *fs; + int error; + + error = read_fs(fp->f_dev, &fp->f_fs, &fp->f_gd, &fp->f_gd_size); + if (error) + return (error); + + fs = fp->f_fs; + + /* + * Calculate indirect block levels. + */ + { + register int mult; + register int level; + + mult = 1; + for (level = 0; level < NIADDR; level++) { + mult *= NINDIR(fs); + fp->f_nindir[level] = mult; + } + } + + return (0); +} + +static void +unmount_fs(fp) + register struct file *fp; +{ + if (file_is_structured(fp)) { + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fp->f_fs, + SBSIZE); + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fp->f_gd, + fp->f_gd_size); + fp->f_fs = 0; + } +} + +/* + * Open a file. + */ +int +ext2_open_file(master_device_port, path, fp) + mach_port_t master_device_port; + char * path; + struct file *fp; +{ +#define RETURN(code) { rc = (code); goto exit; } + + register char *cp, *component; + register int c; /* char */ + register int rc; + ino_t inumber, parent_inumber; + int nlinks = 0; + + char namebuf[MAXPATHLEN+1]; + + if (path == 0 || *path == '\0') { + return FS_NO_ENTRY; + } + + /* + * Copy name into buffer to allow modifying it. + */ + strcpy(namebuf, path); + + /* + * Look for '/dev/xxx' at start of path, for + * root device. + */ + if (!strprefix(namebuf, "/dev/")) { + printf("no device name\n"); + return FS_NO_ENTRY; + } + + cp = namebuf + 5; /* device */ + component = cp; + while ((c = *cp) != '\0' && c != '/') { + cp++; + } + *cp = '\0'; + + bzero (fp, sizeof (struct file)); + + rc = device_open(master_device_port, + D_READ|D_WRITE, + component, + &fp->f_dev); + if (rc) + return rc; + + if (c == 0) { + fp->f_fs = 0; + goto out_ok; + } + + *cp = c; + + rc = mount_fs(fp); + if (rc) + return rc; + + inumber = (ino_t) ROOTINO; + if ((rc = read_inode(inumber, fp)) != 0) { + printf("can't read root inode\n"); + goto exit; + } + + while (*cp) { + + /* + * Check that current node is a directory. + */ + if ((fp->i_ic.i_mode & IFMT) != IFDIR) + RETURN (FS_NOT_DIRECTORY); + + /* + * Remove extra separators + */ + while (*cp == '/') + cp++; + + /* + * Get next component of path name. + */ + component = cp; + { + register int len = 0; + + while ((c = *cp) != '\0' && c != '/') { + if (len++ > MAXNAMLEN) + RETURN (FS_NAME_TOO_LONG); + if (c & 0200) + RETURN (FS_INVALID_PARAMETER); + cp++; + } + *cp = 0; + } + + /* + * Look up component in current directory. + * Save directory inumber in case we find a + * symbolic link. + */ + parent_inumber = inumber; + rc = search_directory(component, fp, &inumber); + if (rc) { + printf("%s: not found\n", path); + goto exit; + } + *cp = c; + + /* + * Open next component. + */ + if ((rc = read_inode(inumber, fp)) != 0) + goto exit; + + /* + * Check for symbolic link. + */ + if ((fp->i_ic.i_mode & IFMT) == IFLNK) { + + int link_len = fp->i_ic.i_size; + int len; + + len = strlen(cp) + 1; + + if (link_len + len >= MAXPATHLEN - 1) + RETURN (FS_NAME_TOO_LONG); + + if (++nlinks > MAXSYMLINKS) + RETURN (FS_SYMLINK_LOOP); + + memmove(&namebuf[link_len], cp, len); + +#ifdef IC_FASTLINK + if (fp->i_ic.i_blocks == 0) { + bcopy(fp->i_ic.i_block, namebuf, (unsigned) link_len); + } + else +#endif IC_FASTLINK + { + /* + * Read file for symbolic link + */ + vm_offset_t buf; + mach_msg_type_number_t buf_size; + daddr_t disk_block; + register struct ext2_super_block *fs = fp->f_fs; + + (void) block_map(fp, (daddr_t)0, &disk_block); + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fs, disk_block), + (int) blksize(fs, fp, 0), + (char **) &buf, + &buf_size); + if (rc) + goto exit; + + bcopy((char *)buf, namebuf, (unsigned)link_len); + (void) vm_deallocate(mach_task_self(), buf, buf_size); + } + + /* + * If relative pathname, restart at parent directory. + * If absolute pathname, restart at root. + * If pathname begins '/dev/<device>/', + * restart at root of that device. + */ + cp = namebuf; + if (*cp != '/') { + inumber = parent_inumber; + } + else if (!strprefix(cp, "/dev/")) { + inumber = (ino_t)ROOTINO; + } + else { + cp += 5; + component = cp; + while ((c = *cp) != '\0' && c != '/') { + cp++; + } + *cp = '\0'; + + /* + * Unmount current file system and free buffers. + */ + ext2_close_file(fp); + + /* + * Open new root device. + */ + rc = device_open(master_device_port, + D_READ, + component, + &fp->f_dev); + if (rc) + return (rc); + + if (c == 0) { + fp->f_fs = 0; + goto out_ok; + } + + *cp = c; + + rc = mount_fs(fp); + if (rc) + return (rc); + + inumber = (ino_t)ROOTINO; + } + if ((rc = read_inode(inumber, fp)) != 0) + goto exit; + } + } + + /* + * Found terminal component. + */ + out_ok: + mutex_init(&fp->f_lock); + return 0; + + /* + * At error exit, close file to free storage. + */ + exit: + ext2_close_file(fp); + return rc; +} + +/* + * Close file - free all storage used. + */ +void +ext2_close_file(fp) + register struct file *fp; +{ + register int i; + + /* + * Free the disk super-block. + */ + unmount_fs(fp); + + /* + * Free the inode and data buffers. + */ + free_file_buffers(fp); +} + +int +ext2_file_is_directory(struct file *fp) +{ + return (fp->i_ic.i_mode & IFMT) == IFDIR; +} + +int +ext2_file_is_regular(struct file *fp) +{ + return (fp->i_ic.i_mode & IFMT) == IFREG; +} + +/* + * Copy a portion of a file into kernel memory. + * Cross block boundaries when necessary. + */ +int +ext2_read_file(fp, offset, start, size, resid) + register struct file *fp; + vm_offset_t offset; + vm_offset_t start; + vm_size_t size; + vm_size_t *resid; /* out */ +{ + int rc; + register vm_size_t csize; + vm_offset_t buf; + vm_size_t buf_size; + + while (size != 0) { + rc = buf_read_file(fp, offset, &buf, &buf_size); + if (rc) + return (rc); + + csize = size; + if (csize > buf_size) + csize = buf_size; + if (csize == 0) + break; + + bcopy((char *)buf, (char *)start, csize); + + offset += csize; + start += csize; + size -= csize; + } + if (resid) + *resid = size; + + return (0); +} + +/* simple utility: only works for 2^n */ +static int +log2(n) + register unsigned int n; +{ + register int i = 0; + + while ((n & 1) == 0) { + i++; + n >>= 1; + } + return i; +} + +/* + * Make an empty file_direct for a device. + */ +int +ext2_open_file_direct(dev, fdp, is_structured) + mach_port_t dev; + register struct file_direct *fdp; + boolean_t is_structured; +{ + struct ext2_super_block *fs; + struct ext2_group_desc *gd; + vm_size_t gd_size; + int rc; + + if (!is_structured) { + fdp->fd_dev = dev; + fdp->fd_blocks = (daddr_t *) 0; + fdp->fd_bsize = vm_page_size; + fdp->fd_bshift = log2(vm_page_size); + fdp->fd_fsbtodb = 0; /* later */ + fdp->fd_size = 0; /* later */ + return 0; + } + + rc = read_fs(dev, &fs, &gd, &gd_size); + if (rc) + return rc; + + fdp->fd_dev = dev; + fdp->fd_blocks = (daddr_t *) 0; + fdp->fd_size = 0; + fdp->fd_bsize = EXT2_BLOCK_SIZE(fs); + fdp->fd_bshift = log2(fdp->fd_bsize); + fdp->fd_fsbtodb = log2(fdp->fd_bsize / DEV_BSIZE); + + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fs, + SBSIZE); + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) gd, + gd_size); + + return 0; +} + +/* + * Add blocks from a file to a file_direct. + */ +int +ext2_add_file_direct(fdp, fp) + register struct file_direct *fdp; + register struct file *fp; +{ + register struct ext2_super_block *fs; + long num_blocks, i; + vm_offset_t buffer; + vm_size_t size; + int rc; + + /* the file must be on the same device */ + + if (fdp->fd_dev != fp->f_dev) + return FS_INVALID_FS; + + if (!file_is_structured(fp)) { + int result[DEV_GET_SIZE_COUNT]; + natural_t count; + + count = DEV_GET_SIZE_COUNT; + rc = device_get_status( fdp->fd_dev, DEV_GET_SIZE, + result, &count); + if (rc) + return rc; + fdp->fd_size = result[DEV_GET_SIZE_DEVICE_SIZE] >> fdp->fd_bshift; + fdp->fd_fsbtodb = log2(fdp->fd_bsize/result[DEV_GET_SIZE_RECORD_SIZE]); + return 0; + } + + /* it must hold a file system */ + + fs = fp->f_fs; +/* + if (fdp->fd_bsize != fs->fs_bsize || + fdp->fd_fsbtodb != fs->fs_fsbtodb) +*/ + if (fdp->fd_bsize != EXT2_BLOCK_SIZE(fs)) + return FS_INVALID_FS; + + /* calculate number of blocks in the file, ignoring fragments */ + + num_blocks = lblkno(fs, fp->i_ic.i_size); + + /* allocate memory for a bigger array */ + + size = (num_blocks + fdp->fd_size) * sizeof(daddr_t); + rc = vm_allocate(mach_task_self(), &buffer, size, TRUE); + if (rc != KERN_SUCCESS) + return rc; + + /* lookup new block addresses */ + + for (i = 0; i < num_blocks; i++) { + daddr_t disk_block; + + rc = block_map(fp, (daddr_t) i, &disk_block); + if (rc != 0) { + (void) vm_deallocate(mach_task_self(), buffer, size); + return rc; + } + + ((daddr_t *) buffer)[fdp->fd_size + i] = disk_block; + } + + /* copy old addresses and install the new array */ + + if (fdp->fd_blocks != 0) { + bcopy((char *) fdp->fd_blocks, (char *) buffer, + fdp->fd_size * sizeof(daddr_t)); + + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fdp->fd_blocks, + (vm_size_t) (fdp->fd_size * sizeof(daddr_t))); + } + fdp->fd_blocks = (daddr_t *) buffer; + fdp->fd_size += num_blocks; + + /* deallocate cached blocks */ + + free_file_buffers(fp); + + return 0; +} + +int +ext2_remove_file_direct(fdp) + struct file_direct *fdp; +{ + if (fdp->fd_blocks) + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fdp->fd_blocks, + (vm_size_t) (fdp->fd_size * sizeof(daddr_t))); + fdp->fd_blocks = 0; /* sanity */ + /* xxx should lose a ref to fdp->fd_dev here (and elsewhere) xxx */ +} diff --git a/serverboot/ffs_compat.c b/serverboot/ffs_compat.c new file mode 100644 index 00000000..6e322b63 --- /dev/null +++ b/serverboot/ffs_compat.c @@ -0,0 +1,65 @@ +/* + * BSD FFS like functions used to ease porting bootstrap to Linux ext2 fs + * Copyright (C) 1994 Remy Card + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <device/device_types.h> +#include <device/device.h> + +#include <mach/mach_traps.h> +#include <mach/mach_interface.h> + +#include <file_io.h> + +#define EXT2_INODES_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_inode)) + +int ino2blk (struct ext2_super_block *fs, struct ext2_group_desc *gd, int ino) +{ + int group; + int blk; + + group = (ino - 1) / EXT2_INODES_PER_GROUP(fs); + blk = gd[group].bg_inode_table + + (((ino - 1) % EXT2_INODES_PER_GROUP(fs)) / + EXT2_INODES_PER_BLOCK(fs)); + return blk; +} + +int fsbtodb (struct ext2_super_block *fs, int b) +{ + return (b * EXT2_BLOCK_SIZE(fs)) / DEV_BSIZE; +} + +int itoo (struct ext2_super_block *fs, int ino) +{ + return (ino - 1) % EXT2_INODES_PER_BLOCK(fs); +} + +int blkoff (struct ext2_super_block * fs, vm_offset_t offset) +{ + return offset % EXT2_BLOCK_SIZE(fs); +} + +int lblkno (struct ext2_super_block * fs, vm_offset_t offset) +{ + return offset / EXT2_BLOCK_SIZE(fs); +} + +int blksize (struct ext2_super_block *fs, struct file *fp, daddr_t file_block) +{ + return EXT2_BLOCK_SIZE(fs); /* XXX - fix for fragments */ +} diff --git a/serverboot/ffs_compat.h b/serverboot/ffs_compat.h new file mode 100644 index 00000000..d78840f5 --- /dev/null +++ b/serverboot/ffs_compat.h @@ -0,0 +1,54 @@ +/* + * BSD FFS like declarations used to ease porting bootstrap to Linux ext2 fs + * Copyright (C) 1994 Remy Card + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define SBSIZE EXT2_MIN_BLOCK_SIZE /* Size of superblock */ +#define SBLOCK ((daddr_t) 2) /* Location of superblock */ + +#define NDADDR EXT2_NDIR_BLOCKS +#define NIADDR (EXT2_N_BLOCKS - EXT2_NDIR_BLOCKS) + +#define MAXNAMLEN 255 + +#define ROOTINO EXT2_ROOT_INO + +#define NINDIR(fs) EXT2_ADDR_PER_BLOCK(fs) + +#define IC_FASTLINK + +#define IFMT 00170000 +#define IFSOCK 0140000 +#define IFLNK 0120000 +#define IFREG 0100000 +#define IFBLK 0060000 +#define IFDIR 0040000 +#define IFCHR 0020000 +#define IFIFO 0010000 +#define ISUID 0004000 +#define ISGID 0002000 +#define ISVTX 0001000 + +#define f_fs u.ext2.ext2_fs +#define f_gd u.ext2.ext2_gd +#define f_gd_size u.ext2.ext2_gd_size +#define i_ic u.ext2.ext2_ic +#define f_nindir u.ext2.ext2_nindir +#define f_blk u.ext2.ext2_blk +#define f_blksize u.ext2.ext2_blksize +#define f_blkno u.ext2.ext2_blkno + diff --git a/serverboot/ffs_file_io.c b/serverboot/ffs_file_io.c new file mode 100644 index 00000000..0055c302 --- /dev/null +++ b/serverboot/ffs_file_io.c @@ -0,0 +1,969 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Stand-alone file reading package. + */ + +#include <device/device_types.h> +#include <device/device.h> + +#include <mach/mach_traps.h> +#include <mach/mach_interface.h> + +#include "file_io.h" +#include "fs.h" +#include "dir.h" +#include "disk_inode_ffs.h" + +void close_file(); /* forward */ + +/* + * Free file buffers, but don't close file. + */ +static void +free_file_buffers(fp) + register struct file *fp; +{ + register int level; + + /* + * Free the indirect blocks + */ + for (level = 0; level < NIADDR; level++) { + if (fp->f_blk[level] != 0) { + (void) vm_deallocate(mach_task_self(), + fp->f_blk[level], + fp->f_blksize[level]); + fp->f_blk[level] = 0; + } + fp->f_blkno[level] = -1; + } + + /* + * Free the data block + */ + if (fp->f_buf != 0) { + (void) vm_deallocate(mach_task_self(), + fp->f_buf, + fp->f_buf_size); + fp->f_buf = 0; + } + fp->f_buf_blkno = -1; +} + +/* + * Read a new inode into a file structure. + */ +static int +read_inode(inumber, fp) + ino_t inumber; + register struct file *fp; +{ + vm_offset_t buf; + mach_msg_type_number_t buf_size; + register struct fs *fs; + daddr_t disk_block; + kern_return_t rc; + + fs = fp->f_fs; + disk_block = itod(fs, inumber); + + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fp->f_fs, disk_block), + (int) fs->fs_bsize, + (char **)&buf, + &buf_size); + if (rc != KERN_SUCCESS) + return (rc); + + { + register struct dinode *dp; + + dp = (struct dinode *)buf; + dp += itoo(fs, inumber); + fp->i_ic = dp->di_ic; + fp->f_size = fp->i_size; + } + + (void) vm_deallocate(mach_task_self(), buf, buf_size); + + /* + * Clear out the old buffers + */ + free_file_buffers(fp); + + return (0); +} + +/* + * Given an offset in a file, find the disk block number that + * contains that block. + */ +static int +block_map(fp, file_block, disk_block_p) + struct file *fp; + daddr_t file_block; + daddr_t *disk_block_p; /* out */ +{ + int level; + int idx; + daddr_t ind_block_num; + kern_return_t rc; + + vm_offset_t olddata[NIADDR+1]; + vm_size_t oldsize[NIADDR+1]; + + /* + * Index structure of an inode: + * + * i_db[0..NDADDR-1] hold block numbers for blocks + * 0..NDADDR-1 + * + * i_ib[0] index block 0 is the single indirect + * block + * holds block numbers for blocks + * NDADDR .. NDADDR + NINDIR(fs)-1 + * + * i_ib[1] index block 1 is the double indirect + * block + * holds block numbers for INDEX blocks + * for blocks + * NDADDR + NINDIR(fs) .. + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 + * + * i_ib[2] index block 2 is the triple indirect + * block + * holds block numbers for double-indirect + * blocks for blocks + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 + * + NINDIR(fs)**3 - 1 + */ + + mutex_lock(&fp->f_lock); + + if (file_block < NDADDR) { + /* Direct block. */ + *disk_block_p = fp->i_db[file_block]; + mutex_unlock(&fp->f_lock); + return (0); + } + + file_block -= NDADDR; + + /* + * nindir[0] = NINDIR + * nindir[1] = NINDIR**2 + * nindir[2] = NINDIR**3 + * etc + */ + for (level = 0; level < NIADDR; level++) { + if (file_block < fp->f_nindir[level]) + break; + file_block -= fp->f_nindir[level]; + } + if (level == NIADDR) { + /* Block number too high */ + mutex_unlock(&fp->f_lock); + return (FS_NOT_IN_FILE); + } + + ind_block_num = fp->i_ib[level]; + + /* + * Initialize array of blocks to free. + */ + for (idx = 0; idx < NIADDR; idx++) + oldsize[idx] = 0; + + for (; level >= 0; level--) { + + vm_offset_t data; + mach_msg_type_number_t size; + + if (ind_block_num == 0) + break; + + if (fp->f_blkno[level] == ind_block_num) { + /* + * Cache hit. Just pick up the data. + */ + + data = fp->f_blk[level]; + } + else { + /* + * Drop our lock while doing the read. + * (The f_dev and f_fs fields don`t change.) + */ + mutex_unlock(&fp->f_lock); + + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fp->f_fs, ind_block_num), + fp->f_fs->fs_bsize, + (char **)&data, + &size); + if (rc != KERN_SUCCESS) + return (rc); + + /* + * See if we can cache the data. Need a write lock to + * do this. While we hold the write lock, we can`t do + * *anything* which might block for memory. Otherwise + * a non-privileged thread might deadlock with the + * privileged threads. We can`t block while taking the + * write lock. Otherwise a non-privileged thread + * blocked in the vm_deallocate (while holding a read + * lock) will block a privileged thread. For the same + * reason, we can`t take a read lock and then use + * lock_read_to_write. + */ + + mutex_lock(&fp->f_lock); + + olddata[level] = fp->f_blk[level]; + oldsize[level] = fp->f_blksize[level]; + + fp->f_blkno[level] = ind_block_num; + fp->f_blk[level] = data; + fp->f_blksize[level] = size; + + /* + * Return to holding a read lock, and + * dispose of old data. + */ + + } + + if (level > 0) { + idx = file_block / fp->f_nindir[level-1]; + file_block %= fp->f_nindir[level-1]; + } + else + idx = file_block; + + ind_block_num = ((daddr_t *)data)[idx]; + } + + mutex_unlock(&fp->f_lock); + + /* + * After unlocking the file, free any blocks that + * we need to free. + */ + for (idx = 0; idx < NIADDR; idx++) + if (oldsize[idx] != 0) + (void) vm_deallocate(mach_task_self(), + olddata[idx], + oldsize[idx]); + + *disk_block_p = ind_block_num; + return (0); +} + +/* + * Read a portion of a file into an internal buffer. Return + * the location in the buffer and the amount in the buffer. + */ +static int +buf_read_file(fp, offset, buf_p, size_p) + register struct file *fp; + vm_offset_t offset; + vm_offset_t *buf_p; /* out */ + vm_size_t *size_p; /* out */ +{ + register struct fs *fs; + vm_offset_t off; + register daddr_t file_block; + daddr_t disk_block; + int rc; + vm_offset_t block_size; + + if (offset >= fp->i_size) + return (FS_NOT_IN_FILE); + + fs = fp->f_fs; + + off = blkoff(fs, offset); + file_block = lblkno(fs, offset); + block_size = blksize(fs, fp, file_block); + + if (file_block != fp->f_buf_blkno) { + rc = block_map(fp, file_block, &disk_block); + if (rc != 0) + return (rc); + + if (fp->f_buf) + (void)vm_deallocate(mach_task_self(), + fp->f_buf, + fp->f_buf_size); + + if (disk_block == 0) { + (void)vm_allocate(mach_task_self(), + &fp->f_buf, + block_size, + TRUE); + fp->f_buf_size = block_size; + } + else { + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fs, disk_block), + (int) block_size, + (char **) &fp->f_buf, + (mach_msg_type_number_t *)&fp->f_buf_size); + } + if (rc) + return (rc); + + fp->f_buf_blkno = file_block; + } + + /* + * Return address of byte in buffer corresponding to + * offset, and size of remainder of buffer after that + * byte. + */ + *buf_p = fp->f_buf + off; + *size_p = block_size - off; + + /* + * But truncate buffer at end of file. + */ + if (*size_p > fp->i_size - offset) + *size_p = fp->i_size - offset; + + return (0); +} + +/* In 4.4 d_reclen is split into d_type and d_namlen */ +struct dirent_44 { + unsigned long d_fileno; + unsigned short d_reclen; + unsigned char d_type; + unsigned char d_namlen; + char d_name[255 + 1]; +}; + +/* + * Search a directory for a name and return its + * i_number. + */ +static int +search_directory(name, fp, inumber_p) + char * name; + register struct file *fp; + ino_t *inumber_p; /* out */ +{ + vm_offset_t buf; + vm_size_t buf_size; + vm_offset_t offset; + register struct dirent_44 *dp; + int length; + kern_return_t rc; + + length = strlen(name); + + offset = 0; + while (offset < fp->i_size) { + rc = buf_read_file(fp, offset, &buf, &buf_size); + if (rc != KERN_SUCCESS) + return (rc); + + dp = (struct dirent_44 *)buf; + if (dp->d_ino != 0) { + unsigned short namlen = dp->d_namlen; + /* + * If namlen is zero, then either this is a 4.3 file + * system or the namlen is really zero. In the latter + * case also the 4.3 d_namlen field is zero + * interpreted either way. + */ + if (namlen == 0) + namlen = ((struct direct *)dp)->d_namlen; + + if (namlen == length && + !strcmp(name, dp->d_name)) + { + /* found entry */ + *inumber_p = dp->d_ino; + return (0); + } + } + offset += dp->d_reclen; + } + return (FS_NO_ENTRY); +} + +static int +read_fs(dev, fsp) + mach_port_t dev; + struct fs **fsp; +{ + register struct fs *fs; + vm_offset_t buf; + mach_msg_type_number_t buf_size; + int error; + + error = device_read(dev, 0, (recnum_t) SBLOCK, SBSIZE, + (char **) &buf, &buf_size); + if (error) + return (error); + + fs = (struct fs *)buf; + if (fs->fs_magic != FS_MAGIC || + fs->fs_bsize > MAXBSIZE || + fs->fs_bsize < sizeof(struct fs)) { + (void) vm_deallocate(mach_task_self(), buf, buf_size); + return (FS_INVALID_FS); + } + /* don't read cylinder groups - we aren't modifying anything */ + + *fsp = fs; + return 0; +} + +static int +mount_fs(fp) + register struct file *fp; +{ + register struct fs *fs; + int error; + + error = read_fs(fp->f_dev, &fp->f_fs); + if (error) + return (error); + fs = fp->f_fs; + + /* + * Calculate indirect block levels. + */ + { + register int mult; + register int level; + + mult = 1; + for (level = 0; level < NIADDR; level++) { + mult *= NINDIR(fs); + fp->f_nindir[level] = mult; + } + } + + return (0); +} + +static void +unmount_fs(fp) + register struct file *fp; +{ + if (file_is_structured(fp)) { + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fp->f_fs, + SBSIZE); + fp->f_fs = 0; + } +} + +/* + * Open a file. + */ +int +ffs_open_file(master_device_port, path, fp) + mach_port_t master_device_port; + char * path; + struct file *fp; +{ +#define RETURN(code) { rc = (code); goto exit; } + + register char *cp, *component; + register int c; /* char */ + register int rc; + ino_t inumber, parent_inumber; + int nlinks = 0; + + char namebuf[MAXPATHLEN+1]; + + if (path == 0 || *path == '\0') { + return FS_NO_ENTRY; + } + + /* + * Copy name into buffer to allow modifying it. + */ + strcpy(namebuf, path); + + /* + * Look for '/dev/xxx' at start of path, for + * root device. + */ + if (!strprefix(namebuf, "/dev/")) { + printf("no device name\n"); + return FS_NO_ENTRY; + } + + cp = namebuf + 5; /* device */ + component = cp; + while ((c = *cp) != '\0' && c != '/') { + cp++; + } + *cp = '\0'; + + bzero (fp, sizeof (struct file)); + + rc = device_open(master_device_port, + D_READ|D_WRITE, + component, + &fp->f_dev); + if (rc) + return rc; + + if (c == 0) { + fp->f_fs = 0; + goto out_ok; + } + + *cp = c; + + rc = mount_fs(fp); + if (rc) + return rc; + + inumber = (ino_t) ROOTINO; + if ((rc = read_inode(inumber, fp)) != 0) { + printf("can't read root inode\n"); + goto exit; + } + + while (*cp) { + + /* + * Check that current node is a directory. + */ + if ((fp->i_mode & IFMT) != IFDIR) + RETURN (FS_NOT_DIRECTORY); + + /* + * Remove extra separators + */ + while (*cp == '/') + cp++; + + /* + * Get next component of path name. + */ + component = cp; + { + register int len = 0; + + while ((c = *cp) != '\0' && c != '/') { + if (len++ > MAXNAMLEN) + RETURN (FS_NAME_TOO_LONG); + if (c & 0200) + RETURN (FS_INVALID_PARAMETER); + cp++; + } + *cp = 0; + } + + /* + * Look up component in current directory. + * Save directory inumber in case we find a + * symbolic link. + */ + parent_inumber = inumber; + rc = search_directory(component, fp, &inumber); + if (rc) { + printf("%s: not found\n", path); + goto exit; + } + *cp = c; + + /* + * Open next component. + */ + if ((rc = read_inode(inumber, fp)) != 0) + goto exit; + + /* + * Check for symbolic link. + */ + if ((fp->i_mode & IFMT) == IFLNK) { + + int link_len = fp->i_size; + int len; + + len = strlen(cp) + 1; + + if (link_len + len >= MAXPATHLEN - 1) + RETURN (FS_NAME_TOO_LONG); + + if (++nlinks > MAXSYMLINKS) + RETURN (FS_SYMLINK_LOOP); + + memmove (&namebuf[link_len], cp, len); + +#ifdef IC_FASTLINK + if ((fp->i_flags & IC_FASTLINK) != 0) { + bcopy(fp->i_symlink, namebuf, (unsigned) link_len); + } + else +#endif IC_FASTLINK +#if !defined(DISABLE_BSD44_FASTLINKS) + /* + * There is no bit for fastlinks in 4.4 but instead + * all symlinks that fit into the inode are fastlinks. + * If the second block (ic_db[1]) is zero the symlink + * can't be a fastlink if its length is at least five. + * For symlinks of length one to four there is no easy + * way of knowing whether we are looking at a 4.4 + * fastlink or a 4.3 slowlink. This code always + * guesses the 4.4 way when in doubt. THIS BREAKS 4.3 + * SLOWLINKS OF LENGTH FOUR OR LESS. + */ + if ((link_len <= MAX_FASTLINK_SIZE && fp->i_ic.ic_db[1] != 0) + || (link_len <= 4)) + { + bcopy(fp->i_symlink, namebuf, (unsigned) link_len); + } + else +#endif /* !DISABLE_BSD44_FASTLINKS */ + + { + /* + * Read file for symbolic link + */ + vm_offset_t buf; + mach_msg_type_number_t buf_size; + daddr_t disk_block; + register struct fs *fs = fp->f_fs; + + (void) block_map(fp, (daddr_t)0, &disk_block); + rc = device_read(fp->f_dev, + 0, + (recnum_t) fsbtodb(fs, disk_block), + (int) blksize(fs, fp, 0), + (char **) &buf, + &buf_size); + if (rc) + goto exit; + + bcopy((char *)buf, namebuf, (unsigned)link_len); + (void) vm_deallocate(mach_task_self(), buf, buf_size); + } + + /* + * If relative pathname, restart at parent directory. + * If absolute pathname, restart at root. + * If pathname begins '/dev/<device>/', + * restart at root of that device. + */ + cp = namebuf; + if (*cp != '/') { + inumber = parent_inumber; + } + else if (!strprefix(cp, "/dev/")) { + inumber = (ino_t)ROOTINO; + } + else { + cp += 5; + component = cp; + while ((c = *cp) != '\0' && c != '/') { + cp++; + } + *cp = '\0'; + + /* + * Unmount current file system and free buffers. + */ + close_file(fp); + + /* + * Open new root device. + */ + rc = device_open(master_device_port, + D_READ, + component, + &fp->f_dev); + if (rc) + return (rc); + + if (c == 0) { + fp->f_fs = 0; + goto out_ok; + } + + *cp = c; + + rc = mount_fs(fp); + if (rc) + return (rc); + + inumber = (ino_t)ROOTINO; + } + if ((rc = read_inode(inumber, fp)) != 0) + goto exit; + } + } + + /* + * Found terminal component. + */ + out_ok: + mutex_init(&fp->f_lock); + return 0; + + /* + * At error exit, close file to free storage. + */ + exit: + close_file(fp); + return rc; +} + +/* + * Close file - free all storage used. + */ +void +ffs_close_file(fp) + register struct file *fp; +{ + register int i; + + /* + * Free the disk super-block. + */ + unmount_fs(fp); + + /* + * Free the inode and data buffers. + */ + free_file_buffers(fp); +} + +int +ffs_file_is_directory(struct file *fp) +{ + return (fp->i_mode & IFMT) == IFDIR; +} + +int +ffs_file_is_regular(struct file *fp) +{ + return (fp->i_mode & IFMT) == IFREG; +} + +/* + * Copy a portion of a file into kernel memory. + * Cross block boundaries when necessary. + */ +int +ffs_read_file(fp, offset, start, size, resid) + register struct file *fp; + vm_offset_t offset; + vm_offset_t start; + vm_size_t size; + vm_size_t *resid; /* out */ +{ + int rc; + register vm_size_t csize; + vm_offset_t buf; + vm_size_t buf_size; + + while (size != 0) { + rc = buf_read_file(fp, offset, &buf, &buf_size); + if (rc) + return (rc); + + csize = size; + if (csize > buf_size) + csize = buf_size; + if (csize == 0) + break; + + bcopy((char *)buf, (char *)start, csize); + + offset += csize; + start += csize; + size -= csize; + } + if (resid) + *resid = size; + + return (0); +} + +/* simple utility: only works for 2^n */ +static int +log2(n) + register unsigned int n; +{ + register int i = 0; + + while ((n & 1) == 0) { + i++; + n >>= 1; + } + return i; +} + +/* + * Make an empty file_direct for a device. + */ +int +ffs_open_file_direct(dev, fdp, is_structured) + mach_port_t dev; + register struct file_direct *fdp; + boolean_t is_structured; +{ + struct fs *fs; + int rc; + + if (!is_structured) { + fdp->fd_dev = dev; + fdp->fd_blocks = (daddr_t *) 0; + fdp->fd_bsize = vm_page_size; + fdp->fd_bshift = log2(vm_page_size); + fdp->fd_fsbtodb = 0; /* later */ + fdp->fd_size = 0; /* later */ + return 0; + } + + rc = read_fs(dev, &fs); + if (rc) + return rc; + + fdp->fd_dev = dev; + fdp->fd_blocks = (daddr_t *) 0; + fdp->fd_size = 0; + fdp->fd_bsize = fs->fs_bsize; + fdp->fd_bshift = fs->fs_bshift; + fdp->fd_fsbtodb = fs->fs_fsbtodb; + + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fs, + SBSIZE); + + return 0; +} + +/* + * Add blocks from a file to a file_direct. + */ +int +ffs_add_file_direct(fdp, fp) + register struct file_direct *fdp; + register struct file *fp; +{ + register struct fs *fs; + long num_blocks, i; + vm_offset_t buffer; + vm_size_t size; + int rc; + + /* the file must be on the same device */ + + if (fdp->fd_dev != fp->f_dev) + return FS_INVALID_FS; + + if (!file_is_structured(fp)) { + int result[DEV_GET_SIZE_COUNT]; + natural_t count; + + count = DEV_GET_SIZE_COUNT; + rc = device_get_status( fdp->fd_dev, DEV_GET_SIZE, + result, &count); + if (rc) + return rc; + fdp->fd_size = result[DEV_GET_SIZE_DEVICE_SIZE] >> fdp->fd_bshift; + fdp->fd_fsbtodb = log2(fdp->fd_bsize/result[DEV_GET_SIZE_RECORD_SIZE]); + return 0; + } + + /* it must hold a file system */ + + fs = fp->f_fs; + if (fdp->fd_bsize != fs->fs_bsize || + fdp->fd_fsbtodb != fs->fs_fsbtodb) + return FS_INVALID_FS; + + /* calculate number of blocks in the file, ignoring fragments */ + + num_blocks = lblkno(fs, fp->i_size); + + /* allocate memory for a bigger array */ + + size = (num_blocks + fdp->fd_size) * sizeof(daddr_t); + rc = vm_allocate(mach_task_self(), &buffer, size, TRUE); + if (rc != KERN_SUCCESS) + return rc; + + /* lookup new block addresses */ + + for (i = 0; i < num_blocks; i++) { + daddr_t disk_block; + + rc = block_map(fp, (daddr_t) i, &disk_block); + if (rc != 0) { + (void) vm_deallocate(mach_task_self(), buffer, size); + return rc; + } + + ((daddr_t *) buffer)[fdp->fd_size + i] = disk_block; + } + + /* copy old addresses and install the new array */ + + if (fdp->fd_blocks != 0) { + bcopy((char *) fdp->fd_blocks, (char *) buffer, + fdp->fd_size * sizeof(daddr_t)); + + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fdp->fd_blocks, + (vm_size_t) (fdp->fd_size * sizeof(daddr_t))); + } + fdp->fd_blocks = (daddr_t *) buffer; + fdp->fd_size += num_blocks; + + /* deallocate cached blocks */ + + free_file_buffers(fp); + + return 0; +} + +int +ffs_remove_file_direct(fdp) + struct file_direct *fdp; +{ + if (fdp->fd_blocks) + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fdp->fd_blocks, + (vm_size_t) (fdp->fd_size * sizeof(daddr_t))); + fdp->fd_blocks = 0; /* sanity */ + /* xxx should lose a ref to fdp->fd_dev here (and elsewhere) xxx */ +} diff --git a/serverboot/file_io.c b/serverboot/file_io.c new file mode 100644 index 00000000..99966f95 --- /dev/null +++ b/serverboot/file_io.c @@ -0,0 +1,317 @@ +/* + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Author: Bryan Ford, University of Utah CSL + * MINIX FS patches: Csizmazia Balazs, University ELTE, Hungary + */ +/* This is just an icky kludgy "VFS layer" (harhar) for ffs and ext2 and minix. */ + +#include "file_io.h" + +int +open_file(master_device_port, path, fp) + mach_port_t master_device_port; + char * path; + struct file *fp; +{ + int rc; + + if ((rc = ext2_open_file(master_device_port, path, fp)) + != FS_INVALID_FS) + { + if (rc == 0) + fp->f_fstype = EXT2_FS; + return rc; + } + if ( (rc = minix_open_file(master_device_port, path, fp)) + != FS_INVALID_FS ) + { + if (rc == 0) + fp->f_fstype = MINIX_FS; + return rc; + } + fp->f_fstype = BSD_FFS; + return ffs_open_file(master_device_port, path, fp); +} + +void +close_file(fp) + register struct file *fp; +{ + switch (fp->f_fstype) { + case EXT2_FS: + ext2_close_file(fp); + return; + case MINIX_FS: + minix_close_file(fp); + return; + default: + ffs_close_file(fp); + return; + } +} + +int +read_file(fp, offset, start, size, resid) + register struct file *fp; + vm_offset_t offset; + vm_offset_t start; + vm_size_t size; + vm_size_t *resid; /* out */ +{ + switch (fp->f_fstype) { + case EXT2_FS: + return ext2_read_file(fp, offset, start, size, resid); + case MINIX_FS: + return minix_read_file(fp, offset, start, size, resid); + default: + return ffs_read_file(fp, offset, start, size, resid); + } + +} + +int +file_is_directory(struct file *f) +{ + switch (f->f_fstype) { + case EXT2_FS: + return ext2_file_is_directory(f); + case MINIX_FS: + return minix_file_is_directory(f); + default: + return ffs_file_is_directory(f); + } +} + +int +file_is_regular(struct file *f) +{ + switch (f->f_fstype) { + case EXT2_FS: + return ext2_file_is_regular(f); + case MINIX_FS: + return minix_file_is_regular(f); + default: + return ffs_file_is_regular(f); + } + +} + +int +open_file_direct(dev, fdp, is_structured) + mach_port_t dev; + register struct file_direct *fdp; + boolean_t is_structured; +{ + int rc; + + + if ((rc = ext2_open_file_direct(dev, fdp, is_structured)) + != FS_INVALID_FS) + { + if (rc == 0) + fdp->f_fstype = EXT2_FS; + return rc; + } + if ( (rc = minix_open_file_direct(dev, fdp, is_structured) ) + != FS_INVALID_FS ) + { + if (rc == 0) + fdp->f_fstype = MINIX_FS; + return rc; + } + fdp->f_fstype = BSD_FFS; + return ffs_open_file_direct(dev, fdp, is_structured); +} + +int +add_file_direct(fdp, fp) + register struct file_direct *fdp; + register struct file *fp; +{ + switch (fp->f_fstype) { + case EXT2_FS: + return ext2_add_file_direct(fdp, fp); + case MINIX_FS: + return minix_add_file_direct(fdp, fp); + default: + return ffs_add_file_direct(fdp, fp); + } +} + + +int +remove_file_direct(fdp) + struct file_direct *fdp; +{ + switch (fdp->f_fstype) { + case EXT2_FS: + return ext2_remove_file_direct(fdp); + case MINIX_FS: + return minix_remove_file_direct(fdp); + default: + return ffs_remove_file_direct(fdp); + } +} + +/* + * some other stuff, that was previously defined as macro + */ + +int +file_is_structured(fp) + register struct file *fp; +{ + switch (fp->f_fstype) { + case EXT2_FS: + return (fp)->u.ext2.ext2_fs != 0; + case MINIX_FS: + return (fp)->u.minix.minix_fs != 0; + default: + return (fp)->u.ffs.ffs_fs != 0; + } +} + +/* + * Special read and write routines for default pager. + * Assume that all offsets and sizes are multiples + * of DEV_BSIZE. + */ + +#define fdir_blkoff(fdp, offset) /* offset % fd_bsize */ \ + ((offset) & ((fdp)->fd_bsize - 1)) +#define fdir_lblkno(fdp, offset) /* offset / fd_bsize */ \ + ((offset) >> (fdp)->fd_bshift) + +#define fdir_fsbtodb(fdp, block) /* offset * fd_bsize / DEV_BSIZE */ \ + ((block) << (fdp)->fd_fsbtodb) + +/* + * Read all or part of a data block, and + * return a pointer to the appropriate part. + * Caller must deallocate the block when done. + */ +int +page_read_file_direct(fdp, offset, size, addr, size_read) + register struct file_direct *fdp; + vm_offset_t offset; + vm_size_t size; + vm_offset_t *addr; /* out */ + mach_msg_type_number_t *size_read; /* out */ +{ + vm_offset_t off; + register daddr_t file_block; + daddr_t disk_block; + + if (offset % DEV_BSIZE != 0 || + size % DEV_BSIZE != 0) + panic("page_read_file_direct"); + + if (offset >= (fdp->fd_size << fdp->fd_bshift)) + return (FS_NOT_IN_FILE); + + off = fdir_blkoff(fdp, offset); + file_block = fdir_lblkno(fdp, offset); + + if (file_is_device(fdp)) { + disk_block = file_block; + } else { + disk_block = fdp->fd_blocks[file_block]; + if (disk_block == 0) + return (FS_NOT_IN_FILE); + + if (size > fdp->fd_bsize) { + /* Read only as much as is contiguous on disk. */ + daddr_t b = file_block + 1; + while (b < file_block + fdp->fd_size && + fdp->fd_blocks[b] == disk_block + fdir_fsbtodb(fdp, 1)) + ++b; + size = (b - file_block) * fdp->fd_bsize; + } + } + + return (device_read(fdp->fd_dev, + 0, + (recnum_t) (fdir_fsbtodb(fdp, disk_block) + btodb(off)), + (int) size, + (char **) addr, + size_read)); +} + +/* + * Write all or part of a data block, and + * return the amount written. + */ +int +page_write_file_direct(fdp, offset, addr, size, size_written) + register struct file_direct *fdp; + vm_offset_t offset; + vm_offset_t addr; + vm_size_t size; + vm_offset_t *size_written; /* out */ +{ + vm_offset_t off; + register daddr_t file_block; + daddr_t disk_block; + int rc, num_written; + vm_offset_t block_size; + + if (offset % DEV_BSIZE != 0 || + size % DEV_BSIZE != 0) + panic("page_write_file"); + + if (offset >= (fdp->fd_size << fdp->fd_bshift)) + return (FS_NOT_IN_FILE); + + off = fdir_blkoff(fdp, offset); + file_block = fdir_lblkno(fdp, offset); + + if (file_is_device(fdp)) { + disk_block = file_block; + } else { + disk_block = fdp->fd_blocks[file_block]; + if (disk_block == 0) + return (FS_NOT_IN_FILE); + + if (size > fdp->fd_bsize) { + /* Write only as much as is contiguous on disk. */ + daddr_t b = file_block + 1; + while (b < file_block + fdp->fd_size && + fdp->fd_blocks[b] == disk_block + fdir_fsbtodb(fdp, 1)) + ++b; + size = (b - file_block) * fdp->fd_bsize; + } + } + + /* + * Write the data. Wait for completion to keep + * reads from getting ahead of writes and reading + * stale data. + */ + rc = device_write( + fdp->fd_dev, + 0, + (recnum_t) (fdir_fsbtodb(fdp, disk_block) + btodb(off)), + (char *) addr, + size, + &num_written); + *size_written = num_written; + return rc; +} diff --git a/serverboot/file_io.h b/serverboot/file_io.h new file mode 100644 index 00000000..323e4e9a --- /dev/null +++ b/serverboot/file_io.h @@ -0,0 +1,200 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#ifndef _FILE_IO_H_ +#define _FILE_IO_H_ + +/* + * Read-only file IO. + */ + +#include <mach.h> +#include <cthreads.h> + +#include <stdint.h> +#include <device/device_types.h> + +/* Types used by the ext2 header files. */ +typedef u_int32_t __u32; +typedef int32_t __s32; +typedef u_int16_t __u16; +typedef int16_t __s16; +typedef u_int8_t __u8; +typedef int8_t __s8; + +#include <defs.h> +#include "minix_fs.h" +#include "../ext2fs/ext2_fs.h" /* snarf stolen linux header from ext2fs */ +#include "disk_inode.h" + +#define BSD_FFS 0 +#define EXT2_FS 1 +#define MINIX_FS 2 + +#define EXT2_NIADDR (EXT2_N_BLOCKS - EXT2_NDIR_BLOCKS) + +/* + * In-core open file. + */ +struct file { + struct mutex f_lock; /* lock */ + mach_port_t f_dev; /* port to device */ + vm_offset_t f_buf; /* buffer for data block */ + vm_size_t f_buf_size; /* size of data block */ + daddr_t f_buf_blkno; /* block number of data block */ + vm_size_t f_size; /* size in bytes of the file */ + + int f_fstype; /* contains fs-id */ + + union { + struct { + struct fs * ffs_fs; /* pointer to super-block */ + struct icommon ffs_ic; /* copy of on-disk inode */ + + /* number of blocks mapped by + indirect block at level i */ + int ffs_nindir[FFS_NIADDR+1]; + + /* buffer for indirect block at level i */ + vm_offset_t ffs_blk[FFS_NIADDR]; + + /* size of buffer */ + vm_size_t ffs_blksize[FFS_NIADDR]; + + /* disk address of block in buffer */ + daddr_t ffs_blkno[FFS_NIADDR]; + } ffs; + struct { + /* pointer to super-block */ + struct ext2_super_block*ext2_fs; + + /* pointer to group descriptors */ + struct ext2_group_desc* ext2_gd; + + /* size of group descriptors */ + vm_size_t ext2_gd_size; + + /* copy of on-disk inode */ + struct ext2_inode ext2_ic; + + /* number of blocks mapped by + indirect block at level i */ + int ext2_nindir[EXT2_NIADDR+1]; + + /* buffer for indirect block at level i */ + vm_offset_t ext2_blk[EXT2_NIADDR]; + + /* size of buffer */ + vm_size_t ext2_blksize[EXT2_NIADDR]; + + /* disk address of block in buffer */ + daddr_t ext2_blkno[EXT2_NIADDR]; + } ext2; + struct { + /* pointer to super-block */ + struct minix_super_block* minix_fs; + + /* copy of on-disk inode */ + struct minix_inode minix_ic; + + /* number of blocks mapped by + indirect block at level i */ + int minix_nindir[MINIX_NIADDR+1]; + + /* buffer for indirect block at level i */ + vm_offset_t minix_blk[MINIX_NIADDR]; + + /* size of buffer */ + vm_size_t minix_blksize[MINIX_NIADDR]; + + /* disk address of block in buffer */ + minix_daddr_t minix_blkno[MINIX_NIADDR]; + } minix; + } u; +}; + +/* + * In-core open file, with in-core block map. + */ +struct file_direct { + int f_fstype; /* XXX was: true if ext2, false if ffs */ + + mach_port_t fd_dev; /* port to device */ + daddr_t * fd_blocks; /* array of disk block addresses */ + long fd_size; /* number of blocks in the array */ + long fd_bsize; /* disk block size */ + long fd_bshift; /* log2(fd_bsize) */ + long fd_fsbtodb; /* log2(fd_bsize / disk sector size) */ +}; + +#define file_is_device(_fd_) ((_fd_)->fd_blocks == 0) + +/* + * Exported routines. + */ + +extern int open_file(); +extern void close_file(); +extern int read_file(); + +extern int open_file_direct(); +extern int add_file_direct(); +extern int remove_file_direct(); +extern int file_wire_direct(); +extern int page_read_file_direct(); +extern int page_write_file_direct(); + +/* + * Error codes for file system errors. + */ + +#include <errno.h> + +/* Just use the damn Hurd error numbers. This is old CMU/Utah code from + the days of personality-independent Mach where it made sense for this to + be a standalone universe. In the Hurd, we compile serverboot against + the regular C library anyway. */ + +#define FS_NOT_DIRECTORY ENOTDIR +#define FS_NO_ENTRY ENOENT +#define FS_NAME_TOO_LONG ENAMETOOLONG +#define FS_SYMLINK_LOOP ELOOP +#define FS_INVALID_FS EFTYPE /* ? */ +#define FS_NOT_IN_FILE EINVAL +#define FS_INVALID_PARAMETER EINVAL + +#if 0 +#define FS_NOT_DIRECTORY 5000 /* not a directory */ +#define FS_NO_ENTRY 5001 /* name not found */ +#define FS_NAME_TOO_LONG 5002 /* name too long */ +#define FS_SYMLINK_LOOP 5003 /* symbolic link loop */ +#define FS_INVALID_FS 5004 /* bad file system */ +#define FS_NOT_IN_FILE 5005 /* offset not in file */ +#define FS_INVALID_PARAMETER 5006 /* bad parameter to routine */ +#endif + + +#endif /* _FILE_IO_H_ */ diff --git a/serverboot/fs.h b/serverboot/fs.h new file mode 100644 index 00000000..5809ed93 --- /dev/null +++ b/serverboot/fs.h @@ -0,0 +1,455 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Copyright (c) 1982, 1986 Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms are permitted + * provided that the above copyright notice and this paragraph are + * duplicated in all such forms and that any documentation, + * advertising materials, and other materials related to such + * distribution and use acknowledge that the software was developed + * by the University of California, Berkeley. The name of the + * University may not be used to endorse or promote products derived + * from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + * @(#)fs.h 7.7 (Berkeley) 5/9/89 + */ + +/* + * Each disk drive contains some number of file systems. + * A file system consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A file system is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * For file system fs, the offsets of the various blocks of interest + * are given in the super block as: + * [fs->fs_sblkno] Super-block + * [fs->fs_cblkno] Cylinder group block + * [fs->fs_iblkno] Inode blocks + * [fs->fs_dblkno] Data blocks + * The beginning of cylinder group cg in fs, is given by + * the ``cgbase(fs, cg)'' macro. + * + * The first boot and super blocks are given in absolute disk addresses. + * The byte-offset forms are preferred, as they don't imply a sector size. + */ +#define BBSIZE 8192 +#define SBSIZE 8192 +#define BBOFF ((off_t)(0)) +#define SBOFF ((off_t)(BBOFF + BBSIZE)) +#define BBLOCK ((daddr_t)(0)) +#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) + +/* + * Addresses stored in inodes are capable of addressing fragments + * of `blocks'. File system blocks of at most size MAXBSIZE can + * be optionally broken into 2, 4, or 8 pieces, each of which is + * addressible; these pieces may be DEV_BSIZE, or some multiple of + * a DEV_BSIZE unit. + * + * Large files consist of exclusively large data blocks. To avoid + * undue wasted disk space, the last data block of a small file may be + * allocated as only as many fragments of a large block as are + * necessary. The file system format retains only a single pointer + * to such a fragment, which is a piece of a single large block that + * has been divided. The size of such a fragment is determinable from + * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. + * + * The file system records space availability at the fragment level; + * to determine block availability, aligned fragments are examined. + * + * The root inode is the root of the file system. + * Inode 0 can't be used for normal purposes and + * historically bad blocks were linked to inode 1, + * thus the root inode is 2. (inode 1 is no longer used for + * this purpose, however numerous dump tapes make this + * assumption, so we are stuck with it) + */ +#define ROOTINO ((ino_t)2) /* i number of all roots */ + +/* + * MINBSIZE is the smallest allowable block size. + * In order to insure that it is possible to create files of size + * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. + * MINBSIZE must be big enough to hold a cylinder group block, + * thus changes to (struct cg) must keep its size within MINBSIZE. + * Note that super blocks are always of size SBSIZE, + * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. + */ +#define MINBSIZE 4096 + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + * The limit on the amount of summary information per file system + * is defined by MAXCSBUFS. It is currently parameterized for a + * maximum of two million cylinders. + */ +#define MAXMNTLEN 512 +#define MAXCSBUFS 32 + +/* + * Per cylinder group information; summarized in blocks allocated + * from first cylinder group data blocks. These blocks have to be + * read in from fs_csaddr (size fs_cssize) in addition to the + * super block. + * + * N.B. sizeof(struct csum) must be a power of two in order for + * the ``fs_cs'' macro to work (see below). + */ +struct csum { + int cs_ndir; /* number of directories */ + int cs_nbfree; /* number of free blocks */ + int cs_nifree; /* number of free inodes */ + int cs_nffree; /* number of free frags */ +}; + +/* + * Super block for a file system. + */ +#define FS_MAGIC 0x011954 +struct fs +{ + int xxx1; /* struct fs *fs_link;*/ + int xxx2; /* struct fs *fs_rlink;*/ + daddr_t fs_sblkno; /* addr of super-block in filesys */ + daddr_t fs_cblkno; /* offset of cyl-block in filesys */ + daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ + daddr_t fs_dblkno; /* offset of first data after cg */ + int fs_cgoffset; /* cylinder group offset in cylinder */ + int fs_cgmask; /* used to calc mod fs_ntrak */ + time_t fs_time; /* last time written */ + int fs_size; /* number of blocks in fs */ + int fs_dsize; /* number of data blocks in fs */ + int fs_ncg; /* number of cylinder groups */ + int fs_bsize; /* size of basic blocks in fs */ + int fs_fsize; /* size of frag blocks in fs */ + int fs_frag; /* number of frags in a block in fs */ +/* these are configuration parameters */ + int fs_minfree; /* minimum percentage of free blocks */ + int fs_rotdelay; /* num of ms for optimal next block */ + int fs_rps; /* disk revolutions per second */ +/* these fields can be computed from the others */ + int fs_bmask; /* ``blkoff'' calc of blk offsets */ + int fs_fmask; /* ``fragoff'' calc of frag offsets */ + int fs_bshift; /* ``lblkno'' calc of logical blkno */ + int fs_fshift; /* ``numfrags'' calc number of frags */ +/* these are configuration parameters */ + int fs_maxcontig; /* max number of contiguous blks */ + int fs_maxbpg; /* max number of blks per cyl group */ +/* these fields can be computed from the others */ + int fs_fragshift; /* block to frag shift */ + int fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ + int fs_sbsize; /* actual size of super block */ + int fs_csmask; /* csum block offset */ + int fs_csshift; /* csum block number */ + int fs_nindir; /* value of NINDIR */ + int fs_inopb; /* value of INOPB */ + int fs_nspf; /* value of NSPF */ +/* yet another configuration parameter */ + int fs_optim; /* optimization preference, see below */ +/* these fields are derived from the hardware */ + int fs_npsect; /* # sectors/track including spares */ + int fs_interleave; /* hardware sector interleave */ + int fs_trackskew; /* sector 0 skew, per track */ + int fs_headswitch; /* head switch time, usec */ + int fs_trkseek; /* track-to-track seek, usec */ +/* sizes determined by number of cylinder groups and their sizes */ + daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ + int fs_cssize; /* size of cyl grp summary area */ + int fs_cgsize; /* cylinder group size */ +/* these fields are derived from the hardware */ + int fs_ntrak; /* tracks per cylinder */ + int fs_nsect; /* sectors per track */ + int fs_spc; /* sectors per cylinder */ +/* this comes from the disk driver partitioning */ + int fs_ncyl; /* cylinders in file system */ +/* these fields can be computed from the others */ + int fs_cpg; /* cylinders per group */ + int fs_ipg; /* inodes per group */ + int fs_fpg; /* blocks per group * fs_frag */ +/* this data must be re-computed after crashes */ + struct csum fs_cstotal; /* cylinder summary information */ +/* these fields are cleared at mount time */ + char fs_fmod; /* super block modified flag */ + char fs_clean; /* file system is clean flag */ + char fs_ronly; /* mounted read-only flag */ + char fs_flags; /* currently unused flag */ + char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ +/* these fields retain the current block allocation info */ + int fs_cgrotor; /* last cg searched */ +#if 1 + int was_fs_csp[MAXCSBUFS]; +#else + struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */ +#endif + int fs_cpc; /* cyl per cycle in postbl */ + short fs_opostbl[16][8]; /* old rotation block list head */ + long fs_sparecon[50]; /* reserved for future constants */ + long fs_contigsumsize; /* size of cluster summary array */ + long fs_maxsymlinklen; /* max length of an internal symlink */ + long fs_inodefmt; /* format of on-disk inodes */ + quad fs_maxfilesize; /* maximum representable file size */ + quad fs_qbmask; /* ~fs_bmask - for use with quad size */ + quad fs_qfmask; /* ~fs_fmask - for use with quad size */ + long fs_state; /* validate fs_clean field */ + int fs_postblformat; /* format of positional layout tables */ + int fs_nrpos; /* number of rotaional positions */ + int fs_postbloff; /* (short) rotation block list head */ + int fs_rotbloff; /* (u_char) blocks for each rotation */ + int fs_magic; /* magic number */ + u_char fs_space[1]; /* list of blocks for each rotation */ +/* actually longer */ +}; +/* + * Preference for optimization. + */ +#define FS_OPTTIME 0 /* minimize allocation time */ +#define FS_OPTSPACE 1 /* minimize disk fragmentation */ + +/* + * Rotational layout table format types + */ +#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */ +#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */ +/* + * Macros for access to superblock array structures + */ +#define fs_postbl(fs, cylno) \ + (((fs)->fs_postblformat == FS_42POSTBLFMT) \ + ? ((fs)->fs_opostbl[cylno]) \ + : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos)) +#define fs_rotbl(fs) \ + (((fs)->fs_postblformat == FS_42POSTBLFMT) \ + ? ((fs)->fs_space) \ + : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff))) + +/* + * Convert cylinder group to base address of its global summary info. + * + * N.B. This macro assumes that sizeof(struct csum) is a power of two. + */ +#define fs_cs(fs, indx) \ + fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask] + +/* + * Cylinder group block for a file system. + */ +#define CG_MAGIC 0x090255 +struct cg { + int xxx1; /* struct cg *cg_link;*/ + int cg_magic; /* magic number */ + time_t cg_time; /* time last written */ + int cg_cgx; /* we are the cgx'th cylinder group */ + short cg_ncyl; /* number of cyl's this cg */ + short cg_niblk; /* number of inode blocks this cg */ + int cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + int cg_rotor; /* position of last used block */ + int cg_frotor; /* position of last used frag */ + int cg_irotor; /* position of last used inode */ + int cg_frsum[MAXFRAG]; /* counts of available frags */ + int cg_btotoff; /* (long) block totals per cylinder */ + int cg_boff; /* (short) free block positions */ + int cg_iusedoff; /* (char) used inode map */ + int cg_freeoff; /* (u_char) free block map */ + int cg_nextfreeoff; /* (u_char) next available space */ + int cg_sparecon[16]; /* reserved for future use */ + u_char cg_space[1]; /* space for cylinder group maps */ +/* actually longer */ +}; +/* + * Macros for access to cylinder group array structures + */ +#define cg_blktot(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_btot) \ + : ((int *)((char *)(cgp) + (cgp)->cg_btotoff))) +#define cg_blks(fs, cgp, cylno) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_b[cylno]) \ + : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos)) +#define cg_inosused(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_iused) \ + : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff))) +#define cg_blksfree(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_free) \ + : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff))) +#define cg_chkmagic(cgp) \ + ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC) + +/* + * The following structure is defined + * for compatibility with old file systems. + */ +struct ocg { + int xxx1; /* struct ocg *cg_link;*/ + int xxx2; /* struct ocg *cg_rlink;*/ + time_t cg_time; /* time last written */ + int cg_cgx; /* we are the cgx'th cylinder group */ + short cg_ncyl; /* number of cyl's this cg */ + short cg_niblk; /* number of inode blocks this cg */ + int cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + int cg_rotor; /* position of last used block */ + int cg_frotor; /* position of last used frag */ + int cg_irotor; /* position of last used inode */ + int cg_frsum[8]; /* counts of available frags */ + int cg_btot[32]; /* block totals per cylinder */ + short cg_b[32][8]; /* positions of free blocks */ + char cg_iused[256]; /* used inode map */ + int cg_magic; /* magic number */ + u_char cg_free[1]; /* free block map */ +/* actually longer */ +}; + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb) +#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) + +/* + * Cylinder group macros to locate things in cylinder groups. + * They calc file system addresses of cylinder group data structures. + */ +#define cgbase(fs, c) ((daddr_t)((fs)->fs_fpg * (c))) +#define cgstart(fs, c) \ + (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask))) +#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ +#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ +#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ +#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ + +/* + * Macros for handling inode numbers: + * inode number to file system block offset. + * inode number to cylinder group number. + * inode number to file system block address. + */ +#define itoo(fs, x) ((x) % INOPB(fs)) +#define itog(fs, x) ((x) / (fs)->fs_ipg) +#define itod(fs, x) \ + ((daddr_t)(cgimin(fs, itog(fs, x)) + \ + (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) + +/* + * Give cylinder group number for a file system block. + * Give cylinder group block number for a file system block. + */ +#define dtog(fs, d) ((d) / (fs)->fs_fpg) +#define dtogd(fs, d) ((d) % (fs)->fs_fpg) + +/* + * Extract the bits for a block from a map. + * Compute the cylinder and rotational position of a cyl block addr. + */ +#define blkmap(fs, map, loc) \ + (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) +#define cbtocylno(fs, bno) \ + ((bno) * NSPF(fs) / (fs)->fs_spc) +#define cbtorpos(fs, bno) \ + (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \ + (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \ + (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & ~(fs)->fs_bmask) +#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ + ((loc) & ~(fs)->fs_fmask) +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs)->fs_bshift) +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs)->fs_fshift) +#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ + (((size) + (fs)->fs_bsize - 1) & (fs)->fs_bmask) +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + (((size) + (fs)->fs_fsize - 1) & (fs)->fs_fmask) +#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ + ((frags) >> (fs)->fs_fragshift) +#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ + ((blks) << (fs)->fs_fragshift) +#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ + ((fsb) & ((fs)->fs_frag - 1)) +#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ + ((fsb) &~ ((fs)->fs_frag - 1)) + +/* + * Determine the number of available frags given a + * percentage to hold in reserve + */ +#define freespace(fs, percentreserved) \ + (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ + (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100)) + +/* + * Determining the size of a file block in the file system. + */ +#define blksize(fs, ip, lbn) \ + (((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) +#define dblksize(fs, dip, lbn) \ + (((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) + +/* + * Number of disk sectors per block; assumes DEV_BSIZE byte sector size. + */ +#define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift) +#define NSPF(fs) ((fs)->fs_nspf) + +/* + * INOPB is the number of inodes in a secondary storage block. + */ +#define INOPB(fs) ((fs)->fs_inopb) +#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) + +/* + * NINDIR is the number of indirects in a file system block. + */ +#define NINDIR(fs) ((fs)->fs_nindir) + diff --git a/serverboot/gets.c b/serverboot/gets.c new file mode 100644 index 00000000..61d14460 --- /dev/null +++ b/serverboot/gets.c @@ -0,0 +1,90 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1989 Carnegie Mellon University. + * Copyright (c) 1994 The University of Utah and + * the Computer Systems Laboratory (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON, THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF + * THIS SOFTWARE IN ITS "AS IS" CONDITION, AND DISCLAIM ANY LIABILITY + * OF ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF + * THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach.h> +#include <device/device.h> +#include <varargs.h> + +extern mach_port_t __libmach_console_port; + +safe_gets(str, maxlen) + char *str; + int maxlen; +{ + register char *lp; + register int c; + + char inbuf[IO_INBAND_MAX]; + mach_msg_type_number_t count; + register char *ip; + char *strmax = str + maxlen - 1; /* allow space for trailing 0 */ + + lp = str; + for (;;) { + count = IO_INBAND_MAX; + (void) device_read_inband(__libmach_console_port, + (dev_mode_t)0, (recnum_t)0, + sizeof(inbuf), inbuf, &count); + for (ip = inbuf; ip < &inbuf[count]; ip++) { + c = *ip; + switch (c) { + case '\n': + case '\r': + printf("\n"); + *lp++ = 0; + return; + + case '\b': + case '#': + case '\177': + if (lp > str) { + printf("\b \b"); + lp--; + } + continue; + case '@': + case 'u'&037: + lp = str; + printf("\n\r"); + continue; + default: + if (c >= ' ' && c < '\177') { + if (lp < strmax) { + *lp++ = c; + printf("%c", c); + } + else { + printf("%c", '\007'); /* beep */ + } + } + } + } + } +} + diff --git a/serverboot/gunzip.c b/serverboot/gunzip.c new file mode 100644 index 00000000..f74da111 --- /dev/null +++ b/serverboot/gunzip.c @@ -0,0 +1,188 @@ +/* Modified by okuji@kuicr.kyoto-u.ac.jp for use in serverboot. */ +/* Decompressing store backend + + Copyright (C) 1997 Free Software Foundation, Inc. + Written by Miles Bader <miles@gnu.ai.mit.edu> + This file is part of the GNU Hurd. + + The GNU Hurd is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2, or (at + your option) any later version. + + The GNU Hurd is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. */ + +#include <stdio.h> +#include <string.h> +#include <setjmp.h> +#include <cthreads.h> +#include <errno.h> + +#include <file_io.h> + +/* gzip.h makes several annoying defines & decls, which we have to work + around. */ +#define file_t gzip_file_t +#include "gzip.h" +#undef file_t +#undef head + +#define IN_BUFFERING (256*1024) +#define OUT_BUFFERING (512*1024) + +static struct mutex unzip_lock = MUTEX_INITIALIZER; + +/* Uncompress the contents of FROM, which should contain a valid gzip file, + into memory, returning the result buffer in BUF & BUF_LEN. */ +int +serverboot_gunzip (struct file *from, void **buf, size_t *buf_len) +{ + /* Entry points to unzip engine. */ + int get_method (int); + extern long int bytes_out; + /* Callbacks from unzip for I/O and error interface. */ + extern int (*unzip_read) (char *buf, size_t maxread); + extern void (*unzip_write) (const char *buf, size_t nwrite); + extern void (*unzip_read_error) (void); + extern void (*unzip_error) (const char *msg); + + /* How we return errors from our hook functions. */ + jmp_buf zerr_jmp_buf; + int zerr; + + size_t offset = 0; /* Offset of read point in FROM. */ + + /* Read at most MAXREAD (or 0 if eof) bytes into BUF from our current + position in FROM. */ + int zread (char *buf, size_t maxread) + { + vm_size_t resid; + size_t did_read; + + if (from->f_size - offset < maxread) + did_read = from->f_size - offset; + else + did_read = maxread; + + zerr = read_file (from, offset, buf, did_read, &resid); + if (zerr) + longjmp (zerr_jmp_buf, 1); + + did_read -= resid; + offset += did_read; + + return did_read; + } + + size_t out_buf_offs = 0; /* Position in the output buffer. */ + + /* Write uncompress data to our output buffer. */ + void zwrite (const char *wbuf, size_t nwrite) + { + size_t old_buf_len = *buf_len; + + if (out_buf_offs + nwrite > old_buf_len) + /* Have to grow the output buffer. */ + { + void *old_buf = *buf; + void *new_buf = old_buf + old_buf_len; /* First try. */ + size_t new_buf_len = round_page (old_buf_len + old_buf_len + nwrite); + + /* Try to grow the buffer. */ + zerr = + vm_allocate (mach_task_self (), + (vm_address_t *)&new_buf, new_buf_len - old_buf_len, + 0); + if (zerr) + /* Can't do that, try to make a bigger buffer elsewhere. */ + { + new_buf = old_buf; + zerr = + vm_allocate (mach_task_self (), + (vm_address_t *)&new_buf, new_buf_len, 1); + if (zerr) + longjmp (zerr_jmp_buf, 1); + + if (out_buf_offs > 0) + /* Copy the old buffer into the start of the new & free it. */ + bcopy (old_buf, new_buf, out_buf_offs); + + vm_deallocate (mach_task_self (), + (vm_address_t)old_buf, old_buf_len); + + *buf = new_buf; + } + + *buf_len = new_buf_len; + } + + bcopy (wbuf, *buf + out_buf_offs, nwrite); + out_buf_offs += nwrite; + } + + void zreaderr (void) + { + zerr = EIO; + longjmp (zerr_jmp_buf, 1); + } + void zerror (const char *msg) + { + zerr = EINVAL; + longjmp (zerr_jmp_buf, 2); + } + + /* Try to guess a reasonable output buffer size. */ + *buf_len = round_page (from->f_size * 2); + zerr = vm_allocate (mach_task_self (), (vm_address_t *)buf, *buf_len, 1); + if (zerr) + return zerr; + + mutex_lock (&unzip_lock); + + unzip_read = zread; + unzip_write = zwrite; + unzip_read_error = zreaderr; + unzip_error = zerror; + + if (! setjmp (zerr_jmp_buf)) + { + if (get_method (0) != 0) + /* Not a happy gzip file. */ + zerr = EINVAL; + else + /* Matched gzip magic number. Ready to unzip. + Set up the output stream and let 'er rip. */ + { + /* Call the gunzip engine. */ + bytes_out = 0; + unzip (17, 23); /* Arguments ignored. */ + zerr = 0; + } + } + + mutex_unlock (&unzip_lock); + + if (zerr) + { + if (*buf_len > 0) + vm_deallocate (mach_task_self (), (vm_address_t)*buf, *buf_len); + } + else if (out_buf_offs < *buf_len) + /* Trim the output buffer to be the right length. */ + { + size_t end = round_page (out_buf_offs); + if (end < *buf_len) + vm_deallocate (mach_task_self (), + (vm_address_t)(*buf + end), *buf_len - end); + *buf_len = out_buf_offs; + } + + return zerr; +} diff --git a/serverboot/kalloc.c b/serverboot/kalloc.c new file mode 100644 index 00000000..80438738 --- /dev/null +++ b/serverboot/kalloc.c @@ -0,0 +1,274 @@ +/* + * Mach Operating System + * Copyright (c) 1993-1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * File: kern/kalloc.c + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * General kernel memory allocator. This allocator is designed + * to be used by the kernel to manage dynamic memory fast. + */ + +#include <mach.h> +#include <cthreads.h> /* for spin locks */ + +#define DEBUG + +/* + * All allocations of size less than kalloc_max are rounded to the + * next highest power of 2. + */ +vm_size_t kalloc_max; /* max before we use vm_allocate */ +#define MINSIZE 4 /* minimum allocation size */ + +struct free_list { + spin_lock_t lock; + vm_offset_t head; /* head of free list */ +#ifdef DEBUG + int count; +#endif /*DEBUG*/ +}; + +#define KLIST_MAX 13 + /* sizes: 4, 8, 16, 32, 64, + 128, 256, 512, 1024, + 2048, 4096, 8192, 16384 */ +struct free_list kfree_list[KLIST_MAX]; + +spin_lock_t kget_space_lock; +vm_offset_t kalloc_next_space = 0; +vm_offset_t kalloc_end_of_space = 0; + +vm_size_t kalloc_wasted_space = 0; + +boolean_t kalloc_initialized = FALSE; + +/* + * Initialize the memory allocator. This should be called only + * once on a system wide basis (i.e. first processor to get here + * does the initialization). + * + * This initializes all of the zones. + */ + +void kalloc_init(void) +{ + vm_offset_t min, max; + vm_size_t size; + register int i; + + /* + * Support free lists for items up to vm_page_size or + * 16Kbytes, whichever is less. + */ + + if (vm_page_size > 16*1024) + kalloc_max = 16*1024; + else + kalloc_max = vm_page_size; + + for (i = 0; i < KLIST_MAX; i++) { + spin_lock_init(&kfree_list[i].lock); + kfree_list[i].head = 0; + } + spin_lock_init(&kget_space_lock); + + /* + * Do not allocate memory at address 0. + */ + kalloc_next_space = vm_page_size; + kalloc_end_of_space = vm_page_size; +} + +/* + * Contiguous space allocator for items of less than a page size. + */ +vm_offset_t kget_space(vm_offset_t size) +{ + vm_size_t space_to_add; + vm_offset_t new_space = 0; + vm_offset_t addr; + + spin_lock(&kget_space_lock); + while (kalloc_next_space + size > kalloc_end_of_space) { + /* + * Add at least one page to allocation area. + */ + space_to_add = round_page(size); + + if (new_space == 0) { + /* + * Unlock and allocate memory. + * Try to make it contiguous with the last + * allocation area. + */ + spin_unlock(&kget_space_lock); + + new_space = kalloc_end_of_space; + if (vm_map(mach_task_self(), + &new_space, space_to_add, (vm_offset_t) 0, TRUE, + MEMORY_OBJECT_NULL, (vm_offset_t) 0, FALSE, + VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT) + != KERN_SUCCESS) + return 0; + wire_memory(new_space, space_to_add, + VM_PROT_READ|VM_PROT_WRITE); + spin_lock(&kget_space_lock); + continue; + } + + /* + * Memory was allocated in a previous iteration. + * Check whether the new region is contiguous with the + * old one. + */ + if (new_space != kalloc_end_of_space) { + /* + * Throw away the remainder of the old space, + * and start a new one. + */ + kalloc_wasted_space += + kalloc_end_of_space - kalloc_next_space; + kalloc_next_space = new_space; + } + kalloc_end_of_space = new_space + space_to_add; + + new_space = 0; + } + + addr = kalloc_next_space; + kalloc_next_space += size; + spin_unlock(&kget_space_lock); + + if (new_space != 0) + (void) vm_deallocate(mach_task_self(), new_space, space_to_add); + + return addr; +} + +void *kalloc(vm_size_t size) +{ + register vm_size_t allocsize; + vm_offset_t addr; + register struct free_list *fl; + + if (!kalloc_initialized) { + kalloc_init(); + kalloc_initialized = TRUE; + } + + /* compute the size of the block that we will actually allocate */ + + allocsize = size; + if (size < kalloc_max) { + allocsize = MINSIZE; + fl = kfree_list; + while (allocsize < size) { + allocsize <<= 1; + fl++; + } + } + + /* + * If our size is still small enough, check the queue for that size + * and allocate. + */ + + if (allocsize < kalloc_max) { + spin_lock(&fl->lock); + if ((addr = fl->head) != 0) { + fl->head = *(vm_offset_t *)addr; +#ifdef DEBUG + fl->count--; +#endif + spin_unlock(&fl->lock); + } + else { + spin_unlock(&fl->lock); + addr = kget_space(allocsize); + } + } + else { + if (vm_allocate(mach_task_self(), &addr, allocsize, TRUE) + != KERN_SUCCESS) + addr = 0; + } + return (void *) addr; +} + +void +kfree( void *data, + vm_size_t size) +{ + register vm_size_t freesize; + register struct free_list *fl; + + freesize = size; + if (size < kalloc_max) { + freesize = MINSIZE; + fl = kfree_list; + while (freesize < size) { + freesize <<= 1; + fl++; + } + } + + if (freesize < kalloc_max) { + spin_lock(&fl->lock); + *(vm_offset_t *)data = fl->head; + fl->head = (vm_offset_t) data; +#ifdef DEBUG + fl->count++; +#endif + spin_unlock(&fl->lock); + } + else { + (void) vm_deallocate(mach_task_self(), (vm_offset_t)data, freesize); + } +} + +void *malloc(vm_size_t size) +{ + return (void *)kalloc(size); +} + +void free(void *addr) +{ + /* Just ignore harmless attempts at cleanliness. */ + /* panic("free not implemented"); */ +} + +void malloc_fork_prepare() +{ +} + +void malloc_fork_parent() +{ +} + +void malloc_fork_child() +{ +} diff --git a/serverboot/load.c b/serverboot/load.c new file mode 100644 index 00000000..fc16baf1 --- /dev/null +++ b/serverboot/load.c @@ -0,0 +1,554 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <assert.h> +#include <mach/mach_interface.h> +#include <varargs.h> +#include "mach-exec.h" +#include "../boot/boot_script.h" + +#include <file_io.h> + + +boolean_t load_protect_text = TRUE; + + +struct stuff +{ + struct file *fp; + task_t user_task; + + /* uncompressed image */ + vm_offset_t image_addr; + vm_size_t image_size; + + vm_offset_t aout_symtab_ofs; + vm_size_t aout_symtab_size; + vm_offset_t aout_strtab_ofs; + vm_size_t aout_strtab_size; +}; + +char *set_regs( + mach_port_t user_task, + mach_port_t user_thread, + struct exec_info *info, + int arg_size); + +static void read_symtab_from_file( + struct file *fp, + mach_port_t host_port, + task_t task, + char * symtab_name, + struct stuff *st); + +/* Callback functions for reading the executable file. */ +static int prog_read(void *handle, vm_offset_t file_ofs, void *buf, vm_size_t size, + vm_size_t *out_actual) +{ + struct stuff *st = handle; + vm_size_t resid; + int result; + + result = read_file(st->fp, file_ofs, buf, size, &resid); + if (result) + return result; + *out_actual = size - resid; + return 0; +} + +static int prog_read_exec(void *handle, vm_offset_t file_ofs, vm_size_t file_size, + vm_offset_t mem_addr, vm_size_t mem_size, + exec_sectype_t sec_type) +{ + struct stuff *st = handle; + vm_offset_t page_start = trunc_page(mem_addr); + vm_offset_t page_end = round_page(mem_addr + mem_size); + vm_prot_t mem_prot = sec_type & EXEC_SECTYPE_PROT_MASK; + vm_offset_t area_start; + int result; + + if (sec_type & EXEC_SECTYPE_AOUT_SYMTAB) + { + st->aout_symtab_ofs = file_ofs; + st->aout_symtab_size = file_size; + } + if (sec_type & EXEC_SECTYPE_AOUT_STRTAB) + { + st->aout_strtab_ofs = file_ofs; + st->aout_strtab_size = file_size; + } + + if (!(sec_type & EXEC_SECTYPE_ALLOC)) + return 0; + + assert(mem_size > 0); + assert(mem_size > file_size); + + /* + printf("section %08x-%08x-%08x prot %08x (%08x-%08x)\n", + mem_addr, mem_addr+file_size, mem_addr+mem_size, mem_prot, page_start, page_end); + */ + + result = vm_allocate(mach_task_self(), &area_start, page_end - page_start, TRUE); + if (result) return (result); + + if (file_size > 0) + { + vm_size_t resid; + + result = read_file(st->fp, file_ofs, area_start + (mem_addr - page_start), + file_size, &resid); + if (result) return result; + if (resid) return EX_CORRUPT; + } + + if (mem_size > file_size) + { + bzero((void*)area_start + (mem_addr + file_size - page_start), + mem_size - file_size); + } + + result = vm_allocate(st->user_task, &page_start, page_end - page_start, FALSE); + if (result) return (result); + assert(page_start == trunc_page(mem_addr)); + + result = vm_write(st->user_task, page_start, area_start, page_end - page_start); + if (result) return (result); + + result = vm_deallocate(mach_task_self(), area_start, page_end - page_start); + if (result) return (result); + + /* + * Protect the segment. + */ + if (load_protect_text && (mem_prot != VM_PROT_ALL)) { + result = vm_protect(st->user_task, page_start, page_end - page_start, + FALSE, mem_prot); + if (result) return (result); + } + + return 0; +} + +/* Callback functions for reading the uncompressed image. */ +static int image_read(void *handle, vm_offset_t file_ofs, void *buf, + vm_size_t size, vm_size_t *out_actual) +{ + struct stuff *st = handle; + bcopy(st->image_addr + file_ofs, buf, size); + *out_actual = size; + return 0; +} + +static int image_read_exec(void *handle, vm_offset_t file_ofs, + vm_size_t file_size, vm_offset_t mem_addr, + vm_size_t mem_size, exec_sectype_t sec_type) +{ + struct stuff *st = handle; + vm_offset_t page_start = trunc_page(mem_addr); + vm_offset_t page_end = round_page(mem_addr + mem_size); + vm_prot_t mem_prot = sec_type & EXEC_SECTYPE_PROT_MASK; + vm_offset_t area_start; + int result; + + if (sec_type & EXEC_SECTYPE_AOUT_SYMTAB) + { + st->aout_symtab_ofs = file_ofs; + st->aout_symtab_size = file_size; + } + if (sec_type & EXEC_SECTYPE_AOUT_STRTAB) + { + st->aout_strtab_ofs = file_ofs; + st->aout_strtab_size = file_size; + } + + if (!(sec_type & EXEC_SECTYPE_ALLOC)) + return 0; + + assert(mem_size > 0); + assert(mem_size > file_size); + + /* + printf("section %08x-%08x-%08x prot %08x (%08x-%08x)\n", + mem_addr, mem_addr+file_size, mem_addr+mem_size, mem_prot, page_start, page_end); + */ + + result = vm_allocate(mach_task_self(), &area_start, page_end - page_start, TRUE); + if (result) return (result); + + if (file_size > 0) + { + bcopy(st->image_addr + file_ofs, area_start + (mem_addr - page_start), + file_size); + } + + if (mem_size > file_size) + { + bzero((void*)area_start + (mem_addr + file_size - page_start), + mem_size - file_size); + } + + result = vm_allocate(st->user_task, &page_start, page_end - page_start, FALSE); + if (result) return (result); + assert(page_start == trunc_page(mem_addr)); + + result = vm_write(st->user_task, page_start, area_start, page_end - page_start); + if (result) return (result); + + result = vm_deallocate(mach_task_self(), area_start, page_end - page_start); + if (result) return (result); + + /* + * Protect the segment. + */ + if (load_protect_text && (mem_prot != VM_PROT_ALL)) { + result = vm_protect(st->user_task, page_start, page_end - page_start, + FALSE, mem_prot); + if (result) return (result); + } + + return 0; +} + +mach_port_t boot_script_read_file (const char *file) +{ return MACH_PORT_NULL; } /* XXX */ + +int +boot_script_exec_cmd (task_t user_task, + char *file_name, + int arg_count, char **argv, + char *argstrings, int argslen) +{ + extern mach_port_t bootstrap_master_device_port, bootstrap_master_host_port; + extern char *root_name; + extern char **environ; + int envc, env_len; + + int arg_len = argslen; + char *arg_pos; + + kern_return_t result; + thread_t user_thread; + struct file file; + char namebuf[MAXPATHLEN+1]; + + struct stuff st; + struct exec_info info; + + extern char * strbuild(); + + if (strcmp (file_name, "/dev/")) + (void) strbuild(namebuf, "/dev/", root_name, "/", file_name, + (char *)0); + else + strcpy (namebuf, file_name); + + /* + * Open the file + */ + bzero((char *)&file, sizeof(file)); + + result = open_file(bootstrap_master_device_port, namebuf, &file); + if (result != 0) { + panic ("%s: %s", namebuf, strerror (result)); + } + + env_len = 0; + for (envc = 0; environ[envc]; ++envc) + env_len += strlen (environ[envc]) + 1; + + /* + * Add space for: + * arg_count + * pointers to arguments + * trailing 0 pointer + * environment variables + * trailing 0 pointer + * and align to integer boundary + */ + arg_len += sizeof(integer_t) + (envc + 2 + arg_count) * sizeof(char *); + arg_len += env_len; + arg_len = (arg_len + (sizeof(integer_t) - 1)) & ~(sizeof(integer_t)-1); + + /* + * We refrain from checking IEXEC bits to make + * things a little easier when things went bad. + * Say you have ftp(1) but chmod(1) is gone. + */ + if (!file_is_regular(&file)) + panic("boot_load_program: %s is not a regular file", namebuf); + + /* + * Load the executable file. + */ + st.fp = &file; + st.user_task = user_task; + st.aout_symtab_size = 0; + st.aout_strtab_size = 0; + result = exec_load(prog_read, prog_read_exec, &st, &info); +#ifdef GZIP + if (result) + { + /* + * It might be gzip file. + */ + int err; + extern int + serverboot_gunzip(struct file *, void **, size_t *); + + err = serverboot_gunzip(st.fp, + &(st.image_addr), + &(st.image_size)); + if (!err) + { + result = exec_load(image_read, + image_read_exec, + &st, + &info); + vm_deallocate(mach_task_self(), + st.image_addr, + st.image_size); + } + } +#endif GZIP +#ifdef BZIP2 + if (result) + { + /* + * It might be bzip2 file. + */ + int err; + extern int + serverboot_bunzip2(struct file *, void **, size_t *); + + err = serverboot_bunzip2(st.fp, + &(st.image_addr), + &(st.image_size)); + if (!err) + { + result = exec_load(image_read, + image_read_exec, + &st, + &info); + vm_deallocate(mach_task_self(), + st.image_addr, + st.image_size); + } + } +#endif BZIP2 + if (result) + panic ("cannot load %s: %s", namebuf, strerror (result)); +#if 0 + printf("(serverboot): loaded %s; entrypoint %08x\n", namebuf, info.entry); +#endif + + /* + * Set up the stack and user registers. + */ + result = thread_create (user_task, &user_thread); + if (result) + panic ("can't create user thread for %s: %s", namebuf, + strerror (result)); + arg_pos = set_regs(user_task, user_thread, &info, arg_len); + + /* + * Read symbols from the executable file. + */ +#if 0 + printf("(serverboot): loading symbols from %s\n", namebuf); + read_symtab_from_file(&file, bootstrap_master_host_port, user_task, namebuf, &st); +#endif + + /* + * Copy out the arguments. + */ + { + vm_offset_t u_arg_start; + /* user start of argument list block */ + vm_offset_t k_arg_start; + /* kernel start of argument list block */ + vm_offset_t u_arg_page_start; + /* user start of args, page-aligned */ + vm_size_t arg_page_size; + /* page_aligned size of args */ + vm_offset_t k_arg_page_start; + /* kernel start of args, page-aligned */ + + register + char ** k_ap; /* kernel arglist address */ + char * u_cp; /* user argument string address */ + register + char * k_cp; /* kernel argument string address */ + register + int i; + + /* + * Get address of argument list in user space + */ + u_arg_start = (vm_offset_t)arg_pos; + + /* + * Round to page boundaries, and allocate kernel copy + */ + u_arg_page_start = trunc_page(u_arg_start); + arg_page_size = (vm_size_t)(round_page(u_arg_start + arg_len) + - u_arg_page_start); + + result = vm_allocate(mach_task_self(), + &k_arg_page_start, + (vm_size_t)arg_page_size, + TRUE); + if (result) + panic("boot_load_program: arg size"); + + /* + * Set up addresses corresponding to user pointers + * in the kernel block + */ + k_arg_start = k_arg_page_start + (u_arg_start - u_arg_page_start); + + k_ap = (char **)k_arg_start; + + /* + * Start the strings after the arg-count and pointers + */ + u_cp = (char *)u_arg_start + arg_count * sizeof(char *) + + envc * sizeof(char *) + + 2 * sizeof(char *) + + sizeof(integer_t); + k_cp = (char *)k_arg_start + arg_count * sizeof(char *) + + envc * sizeof(char *) + + 2 * sizeof(char *) + + sizeof(integer_t); + + /* + * first the argument count + */ + *k_ap++ = (char *)arg_count; + + /* + * Then the strings and string pointers for each argument + */ + for (i = 0; i < arg_count; i++) + *k_ap++ = argv[i] - argstrings + u_cp; + *k_ap++ = (char *)0; + bcopy (argstrings, k_cp, argslen); + k_cp += argslen; + u_cp += argslen; + + for (i = 0; i < envc; i++) + *k_ap++ = environ[i] - environ[0] + u_cp; + *k_ap = (char *)0; + bcopy (environ[0], k_cp, env_len); + + /* + * Now write all of this to user space. + */ + (void) vm_write(user_task, + u_arg_page_start, + k_arg_page_start, + arg_page_size); + + (void) vm_deallocate(mach_task_self(), + k_arg_page_start, + arg_page_size); + } + + /* + * Close the file. + */ + close_file(&file); + + /* Resume the thread. */ + thread_resume (user_thread); + mach_port_deallocate (mach_task_self (), user_thread); + + return (0); +} + +/* + * Load symbols from file into kernel debugger. + */ +static void read_symtab_from_file( + struct file *fp, + mach_port_t host_port, + task_t task, + char * symtab_name, + struct stuff *st) +{ + vm_size_t resid; + kern_return_t result; + vm_size_t table_size; + vm_offset_t symtab; + +#if 0 + + if (!st->aout_symtab_size || !st->aout_strtab_size) + return; + + /* + * Allocate space for the symbol table. + */ + table_size = sizeof(vm_size_t) + + st->aout_symtab_size + + st->aout_strtab_size; + result= vm_allocate(mach_task_self(), + &symtab, + table_size, + TRUE); + if (result) { + printf("[ error %d allocating space for %s symbol table ]\n", + result, symtab_name); + return; + } + + /* + * Set the symbol table length word, + * then read in the symbol table and string table. + */ + *(vm_size_t*)symtab = st->aout_symtab_size; + result = read_file(fp, st->aout_symtab_ofs, + symtab + sizeof(vm_size_t), + st->aout_symtab_size + st->aout_strtab_size, + &resid); + if (result || resid) { + printf("[ no valid symbol table present for %s ]\n", + symtab_name); + } + else { + /* + * Load the symbols into the kernel. + */ + result = host_load_symbol_table( + host_port, + task, + symtab_name, + symtab, + table_size); + } + (void) vm_deallocate(mach_task_self(), symtab, table_size); +#endif +} diff --git a/serverboot/mach-exec.h b/serverboot/mach-exec.h new file mode 100644 index 00000000..94b234b0 --- /dev/null +++ b/serverboot/mach-exec.h @@ -0,0 +1,130 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +#ifndef _MACH_EXEC_H_ +#define _MACH_EXEC_H_ + +#include <mach/machine/vm_types.h> +#include <mach/vm_prot.h> + +/* XXX */ +typedef enum +{ + EXEC_ELF = 1, + EXEC_AOUT = 2, +} exec_format_t; + +typedef struct exec_info +{ + /* Format of executable loaded - see above. */ + exec_format_t format; + + /* Program entrypoint. */ + vm_offset_t entry; + + /* Initial data pointer - only some architectures use this. */ + vm_offset_t init_dp; + + /* (ELF) Address of interpreter string for loading shared libraries, null if none. */ + vm_offset_t interp; + +} exec_info_t; + +typedef int exec_sectype_t; +#define EXEC_SECTYPE_READ VM_PROT_READ +#define EXEC_SECTYPE_WRITE VM_PROT_WRITE +#define EXEC_SECTYPE_EXECUTE VM_PROT_EXECUTE +#define EXEC_SECTYPE_PROT_MASK VM_PROT_ALL +#define EXEC_SECTYPE_ALLOC ((exec_sectype_t)0x000100) +#define EXEC_SECTYPE_LOAD ((exec_sectype_t)0x000200) +#define EXEC_SECTYPE_DEBUG ((exec_sectype_t)0x010000) +#define EXEC_SECTYPE_AOUT_SYMTAB ((exec_sectype_t)0x020000) +#define EXEC_SECTYPE_AOUT_STRTAB ((exec_sectype_t)0x040000) + +typedef int exec_read_func_t(void *handle, vm_offset_t file_ofs, + void *buf, vm_size_t size, + vm_size_t *out_actual); + +typedef int exec_read_exec_func_t(void *handle, + vm_offset_t file_ofs, vm_size_t file_size, + vm_offset_t mem_addr, vm_size_t mem_size, + exec_sectype_t section_type); + +/* + * Routines exported from libmach_exec.a + */ + +/* Generic function to interpret an executable "file" + and "load" it into "memory". + Doesn't really know about files, loading, or memory; + all file I/O and destination memory accesses + go through provided functions. + Thus, this is a very generic loading mechanism. + + The read() function is used to read metadata from the file + into the local address space. + + The read_exec() function is used to load the actual sections. + It is used for all kinds of sections - code, data, bss, debugging data. + The 'section_type' parameter specifies what type of section is being loaded. + + For code, data, and bss, the EXEC_SECTYPE_ALLOC flag will be set. + For code and data (i.e. stuff that's actually loaded from the file), + EXEC_SECTYPE_LOAD will also be set. + The EXEC_SECTYPE_PROT_MASK contains the intended access permissions + for the section. + 'file_size' may be less than 'mem_size'; + the remaining data must be zero-filled. + 'mem_size' is always greater than zero, but 'file_size' may be zero + (e.g. in the case of a bss section). + No two read_exec() calls for one executable + will load data into the same virtual memory page, + although they may load from arbitrary (possibly overlapping) file positions. + + For sections that aren't normally loaded into the process image + (e.g. debug sections), EXEC_SECTYPE_ALLOC isn't set, + but some other appropriate flag is set to indicate the type of section. + + The 'handle' is an opaque pointer which is simply passed on + to the read() and read_exec() functions. + + On return, the specified info structure is filled in + with information about the loaded executable. +*/ +int exec_load(exec_read_func_t *read, exec_read_exec_func_t *read_exec, + void *handle, exec_info_t *out_info); + +/* + * Error codes + */ + +#define EX_NOT_EXECUTABLE 6000 /* not a recognized executable format */ +#define EX_WRONG_ARCH 6001 /* valid executable, but wrong arch. */ +#define EX_CORRUPT 6002 /* recognized executable, but mangled */ +#define EX_BAD_LAYOUT 6003 /* something wrong with the memory or file image layout */ + + +#endif /* _MACH_EXEC_H_ */ diff --git a/serverboot/minix_ffs_compat.c b/serverboot/minix_ffs_compat.c new file mode 100644 index 00000000..7d493520 --- /dev/null +++ b/serverboot/minix_ffs_compat.c @@ -0,0 +1,62 @@ +/* + * BSD FFS like functions used to ease porting bootstrap to MINIX fs + * Copyright (C) 1994 Csizmazia Balazs, University ELTE, Hungary + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <device/device_types.h> +#include <device/device.h> + +#include <mach/mach_traps.h> +#include <mach/mach_interface.h> + +#include <file_io.h> + +#define MINIX_BLOCK_SIZE 1024 + +int minix_ino2blk (struct minix_super_block *fs, int ino) +{ + int blk; + + blk=0 /* it's Mach */+2 /* boot+superblock */ + fs->s_imap_blocks + + fs->s_zmap_blocks + (ino-1)/MINIX_INODES_PER_BLOCK; + return blk; +} + +int minix_fsbtodb (struct minix_super_block *fs, int b) +{ + return (b * MINIX_BLOCK_SIZE) / DEV_BSIZE; +} + +int minix_itoo (struct minix_super_block *fs, int ino) +{ + return (ino - 1) % MINIX_INODES_PER_BLOCK; +} + +int minix_blkoff (struct minix_super_block * fs, vm_offset_t offset) +{ + return offset % MINIX_BLOCK_SIZE; +} + +int minix_lblkno (struct minix_super_block * fs, vm_offset_t offset) +{ + return offset / MINIX_BLOCK_SIZE; +} + +int minix_blksize (struct minix_super_block *fs, struct file *fp, minix_daddr_t file_block) +{ + return MINIX_BLOCK_SIZE; +} diff --git a/serverboot/minix_ffs_compat.h b/serverboot/minix_ffs_compat.h new file mode 100644 index 00000000..cc038032 --- /dev/null +++ b/serverboot/minix_ffs_compat.h @@ -0,0 +1,43 @@ +/* + * BSD FFS like declarations used to ease porting bootstrap to MINIX fs + * Copyright (C) 1994 Csizmazia Balazs, University ELTE, Hungary + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define MINIX_SBSIZE MINIX_BLOCK_SIZE /* Size of superblock */ +#define MINIX_SBLOCK ((minix_daddr_t) 2) /* Location of superblock */ + +#define MINIX_NDADDR 7 +#define MINIX_NIADDR 2 + +#define MINIX_MAXNAMLEN 14 + +#define MINIX_ROOTINO 1 /* MINIX ROOT INODE */ + +#define MINIX_NINDIR(fs) 512 /* DISK_ADDRESSES_PER_BLOCKS */ + +#define IFMT 00170000 +#define IFREG 0100000 +#define IFDIR 0040000 +#define ISVTX 0001000 + +#define f_fs u.minix.minix_fs +#define i_ic u.minix.minix_ic +#define f_nindir u.minix.minix_nindir +#define f_blk u.minix.minix_blk +#define f_blksize u.minix.minix_blksize +#define f_blkno u.minix.minix_blkno + diff --git a/serverboot/minix_file_io.c b/serverboot/minix_file_io.c new file mode 100644 index 00000000..17beb18c --- /dev/null +++ b/serverboot/minix_file_io.c @@ -0,0 +1,851 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Stand-alone file reading package. + */ + +#include <device/device_types.h> +#include <device/device.h> + +#include <mach/mach_traps.h> +#include <mach/mach_interface.h> + +#include "file_io.h" +#include "minix_ffs_compat.h" +#include "minix_fs.h" + +void minix_close_file(); /* forward */ + +#define MINIX_NAME_LEN 14 +#define MINIX_BLOCK_SIZE 1024 + +/* + * Free file buffers, but don't close file. + */ +static void +free_file_buffers(fp) + register struct file *fp; +{ + register int level; + + /* + * Free the indirect blocks + */ + for (level = 0; level < MINIX_NIADDR; level++) { + if (fp->f_blk[level] != 0) { + (void) vm_deallocate(mach_task_self(), + fp->f_blk[level], + fp->f_blksize[level]); + fp->f_blk[level] = 0; + } + fp->f_blkno[level] = -1; + } + + /* + * Free the data block + */ + if (fp->f_buf != 0) { + (void) vm_deallocate(mach_task_self(), + fp->f_buf, + fp->f_buf_size); + fp->f_buf = 0; + } + fp->f_buf_blkno = -1; +} + +/* + * Read a new inode into a file structure. + */ +static int +read_inode(inumber, fp) + ino_t inumber; + register struct file *fp; +{ + vm_offset_t buf; + mach_msg_type_number_t buf_size; + register + struct minix_super_block *fs; + minix_daddr_t disk_block; + kern_return_t rc; + + fs = fp->f_fs; + disk_block = minix_ino2blk(fs, inumber); + + rc = device_read(fp->f_dev, + 0, + (recnum_t) minix_fsbtodb(fp->f_fs, disk_block), + (int) MINIX_BLOCK_SIZE, + (char **)&buf, + &buf_size); + if (rc != KERN_SUCCESS) + return (rc); + + { + register struct minix_inode *dp; + + dp = (struct minix_inode *)buf; + dp += minix_itoo(fs, inumber); + fp->i_ic = *dp; + fp->f_size = dp->i_size; + } + + (void) vm_deallocate(mach_task_self(), buf, buf_size); + + /* + * Clear out the old buffers + */ + free_file_buffers(fp); + + return (0); +} + +/* + * Given an offset in a file, find the disk block number that + * contains that block. + */ +static int +block_map(fp, file_block, disk_block_p) + struct file *fp; + minix_daddr_t file_block; + minix_daddr_t *disk_block_p; /* out */ +{ + int level; + int idx; + minix_daddr_t ind_block_num; + kern_return_t rc; + + vm_offset_t olddata[MINIX_NIADDR+1]; + vm_size_t oldsize[MINIX_NIADDR+1]; + + /* + * Index structure of an inode: + * + * i_db[0..NDADDR-1] hold block numbers for blocks + * 0..NDADDR-1 + * + * i_ib[0] index block 0 is the single indirect + * block + * holds block numbers for blocks + * NDADDR .. NDADDR + NINDIR(fs)-1 + * + * i_ib[1] index block 1 is the double indirect + * block + * holds block numbers for INDEX blocks + * for blocks + * NDADDR + NINDIR(fs) .. + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 + * + * i_ib[2] index block 2 is the triple indirect + * block + * holds block numbers for double-indirect + * blocks for blocks + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. + * NDADDR + NINDIR(fs) + NINDIR(fs)**2 + * + NINDIR(fs)**3 - 1 + */ + + mutex_lock(&fp->f_lock); + + if (file_block < MINIX_NDADDR) { + /* Direct block. */ + *disk_block_p = fp->i_ic.i_zone[file_block]; + mutex_unlock(&fp->f_lock); + return (0); + } + + file_block -= MINIX_NDADDR; + + /* + * nindir[0] = NINDIR + * nindir[1] = NINDIR**2 + * nindir[2] = NINDIR**3 + * etc + */ + for (level = 0; level < MINIX_NIADDR; level++) { + if (file_block < fp->f_nindir[level]) + break; + file_block -= fp->f_nindir[level]; + } + if (level == MINIX_NIADDR) { + /* Block number too high */ + mutex_unlock(&fp->f_lock); + return (FS_NOT_IN_FILE); + } + + ind_block_num = fp->i_ic.i_zone[level + MINIX_NDADDR]; + + /* + * Initialize array of blocks to free. + */ + for (idx = 0; idx < MINIX_NIADDR; idx++) + oldsize[idx] = 0; + + for (; level >= 0; level--) { + + vm_offset_t data; + mach_msg_type_number_t size; + + if (ind_block_num == 0) + break; + + if (fp->f_blkno[level] == ind_block_num) { + /* + * Cache hit. Just pick up the data. + */ + + data = fp->f_blk[level]; + } + else { + /* + * Drop our lock while doing the read. + * (The f_dev and f_fs fields don`t change.) + */ + mutex_unlock(&fp->f_lock); + + rc = device_read(fp->f_dev, + 0, + (recnum_t) minix_fsbtodb(fp->f_fs, ind_block_num), + MINIX_BLOCK_SIZE, + (char **)&data, + &size); + if (rc != KERN_SUCCESS) + return (rc); + + /* + * See if we can cache the data. Need a write lock to + * do this. While we hold the write lock, we can`t do + * *anything* which might block for memory. Otherwise + * a non-privileged thread might deadlock with the + * privileged threads. We can`t block while taking the + * write lock. Otherwise a non-privileged thread + * blocked in the vm_deallocate (while holding a read + * lock) will block a privileged thread. For the same + * reason, we can`t take a read lock and then use + * lock_read_to_write. + */ + + mutex_lock(&fp->f_lock); + + olddata[level] = fp->f_blk[level]; + oldsize[level] = fp->f_blksize[level]; + + fp->f_blkno[level] = ind_block_num; + fp->f_blk[level] = data; + fp->f_blksize[level] = size; + + /* + * Return to holding a read lock, and + * dispose of old data. + */ + + } + + if (level > 0) { + idx = file_block / fp->f_nindir[level-1]; + file_block %= fp->f_nindir[level-1]; + } + else + idx = file_block; + + ind_block_num = ((minix_daddr_t *)data)[idx]; + } + + mutex_unlock(&fp->f_lock); + + /* + * After unlocking the file, free any blocks that + * we need to free. + */ + for (idx = 0; idx < MINIX_NIADDR; idx++) + if (oldsize[idx] != 0) + (void) vm_deallocate(mach_task_self(), + olddata[idx], + oldsize[idx]); + + *disk_block_p = ind_block_num; + return (0); +} + +/* + * Read a portion of a file into an internal buffer. Return + * the location in the buffer and the amount in the buffer. + */ +static int +buf_read_file(fp, offset, buf_p, size_p) + register struct file *fp; + vm_offset_t offset; + vm_offset_t *buf_p; /* out */ + vm_size_t *size_p; /* out */ +{ + register + struct minix_super_block *fs; + vm_offset_t off; + register minix_daddr_t file_block; + minix_daddr_t disk_block; + int rc; + vm_offset_t block_size; + + if (offset >= fp->i_ic.i_size) + return (FS_NOT_IN_FILE); + + fs = fp->f_fs; + + off = minix_blkoff(fs, offset); + file_block = minix_lblkno(fs, offset); + block_size = minix_blksize(fs, fp, file_block); + + if (((daddr_t) file_block) != fp->f_buf_blkno) { + rc = block_map(fp, file_block, &disk_block); + if (rc != 0) + return (rc); + + if (fp->f_buf) + (void)vm_deallocate(mach_task_self(), + fp->f_buf, + fp->f_buf_size); + + if (disk_block == 0) { + (void)vm_allocate(mach_task_self(), + &fp->f_buf, + block_size, + TRUE); + fp->f_buf_size = block_size; + } + else { + rc = device_read(fp->f_dev, + 0, + (recnum_t) minix_fsbtodb(fs, disk_block), + (int) block_size, + (char **) &fp->f_buf, + (mach_msg_type_number_t *)&fp->f_buf_size); + } + if (rc) + return (rc); + + fp->f_buf_blkno = (daddr_t) file_block; + } + + /* + * Return address of byte in buffer corresponding to + * offset, and size of remainder of buffer after that + * byte. + */ + *buf_p = fp->f_buf + off; + *size_p = block_size - off; + + /* + * But truncate buffer at end of file. + */ + if (*size_p > fp->i_ic.i_size - offset) + *size_p = fp->i_ic.i_size - offset; + + return (0); +} + +/* + * Search a directory for a name and return its + * i_number. + */ +static int +search_directory(name, fp, inumber_p) + char * name; + register struct file *fp; + ino_t *inumber_p; /* out */ +{ + vm_offset_t buf; + vm_size_t buf_size; + vm_offset_t offset; + register struct minix_directory_entry *dp; + int length; + kern_return_t rc; + char tmp_name[15]; + + length = strlen(name); + + offset = 0; + while (offset < fp->i_ic.i_size) { + rc = buf_read_file(fp, offset, &buf, &buf_size); + if (rc != KERN_SUCCESS) + return (rc); + + dp = (struct minix_directory_entry *)buf; + if (dp->inode != 0) { + strncpy (tmp_name, dp->name, MINIX_NAME_LEN /* XXX it's 14 */); + tmp_name[MINIX_NAME_LEN] = '\0'; + if (strlen(tmp_name) == length && + !strcmp(name, tmp_name)) + { + /* found entry */ + *inumber_p = dp->inode; + return (0); + } + } + offset += 16 /* MINIX dir. entry length - MINIX FS Ver. 1. */; + } + return (FS_NO_ENTRY); +} + +static int +read_fs(dev, fsp) + mach_port_t dev; + struct minix_super_block **fsp; +{ + register + struct minix_super_block *fs; + vm_offset_t buf; + mach_msg_type_number_t buf_size; + int error; + + /* + * Read the super block + */ + error = device_read(dev, 0, (recnum_t) MINIX_SBLOCK, MINIX_SBSIZE, + (char **) &buf, &buf_size); + if (error) + return (error); + + /* + * Check the superblock + */ + fs = (struct minix_super_block *)buf; + if (fs->s_magic != MINIX_SUPER_MAGIC) { + (void) vm_deallocate(mach_task_self(), buf, buf_size); + return (FS_INVALID_FS); + } + + + *fsp = fs; + + return 0; +} + +static int +mount_fs(fp) + register struct file *fp; +{ + register struct minix_super_block *fs; + int error; + + error = read_fs(fp->f_dev, &fp->f_fs); + if (error) + return (error); + + fs = fp->f_fs; + + /* + * Calculate indirect block levels. + */ + { + register int mult; + register int level; + + mult = 1; + for (level = 0; level < MINIX_NIADDR; level++) { + mult *= MINIX_NINDIR(fs); + fp->f_nindir[level] = mult; + } + } + + return (0); +} + +static void +unmount_fs(fp) + register struct file *fp; +{ + if (file_is_structured(fp)) { + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fp->f_fs, + MINIX_SBSIZE); + fp->f_fs = 0; + } +} + +/* + * Open a file. + */ +int +minix_open_file(master_device_port, path, fp) + mach_port_t master_device_port; + char * path; + struct file *fp; +{ +#define RETURN(code) { rc = (code); goto exit; } + + register char *cp, *component; + register int c; /* char */ + register int rc; + ino_t inumber, parent_inumber; + int nlinks = 0; + + char namebuf[MAXPATHLEN+1]; + + if (path == 0 || *path == '\0') { + return FS_NO_ENTRY; + } + + /* + * Copy name into buffer to allow modifying it. + */ + strcpy(namebuf, path); + + /* + * Look for '/dev/xxx' at start of path, for + * root device. + */ + if (!strprefix(namebuf, "/dev/")) { + printf("no device name\n"); + return FS_NO_ENTRY; + } + + cp = namebuf + 5; /* device */ + component = cp; + while ((c = *cp) != '\0' && c != '/') { + cp++; + } + *cp = '\0'; + + bzero (fp, sizeof (struct file)); + + rc = device_open(master_device_port, + D_READ|D_WRITE, + component, + &fp->f_dev); + if (rc) + return rc; + + if (c == 0) { + fp->f_fs = 0; + goto out_ok; + } + + *cp = c; + + rc = mount_fs(fp); + if (rc) + return rc; + + inumber = (ino_t) MINIX_ROOTINO; + if ((rc = read_inode(inumber, fp)) != 0) { + printf("can't read root inode\n"); + goto exit; + } + + while (*cp) { + + /* + * Check that current node is a directory. + */ + if ((fp->i_ic.i_mode & IFMT) != IFDIR) + RETURN (FS_NOT_DIRECTORY); + + /* + * Remove extra separators + */ + while (*cp == '/') + cp++; + + /* + * Get next component of path name. + */ + component = cp; + { + register int len = 0; + + while ((c = *cp) != '\0' && c != '/') { + if (len++ > MINIX_MAXNAMLEN) + RETURN (FS_NAME_TOO_LONG); + if (c & 0200) + RETURN (FS_INVALID_PARAMETER); + cp++; + } + *cp = 0; + } + + /* + * Look up component in current directory. + * Save directory inumber in case we find a + * symbolic link. + */ + parent_inumber = inumber; + rc = search_directory(component, fp, &inumber); + if (rc) { + printf("%s: not found\n", path); + goto exit; + } + *cp = c; + + /* + * Open next component. + */ + if ((rc = read_inode(inumber, fp)) != 0) + goto exit; + + /* + * Check for symbolic link. + */ + } + + /* + * Found terminal component. + */ + out_ok: + mutex_init(&fp->f_lock); + return 0; + + /* + * At error exit, close file to free storage. + */ + exit: + minix_close_file(fp); + return rc; +} + +/* + * Close file - free all storage used. + */ +void +minix_close_file(fp) + register struct file *fp; +{ + register int i; + + /* + * Free the disk super-block. + */ + unmount_fs(fp); + + /* + * Free the inode and data buffers. + */ + free_file_buffers(fp); +} + +int +minix_file_is_directory(struct file *fp) +{ + return (fp->i_ic.i_mode & IFMT) == IFDIR; +} + +int +minix_file_is_regular(struct file *fp) +{ + return (fp->i_ic.i_mode & IFMT) == IFREG; +} + +/* + * Copy a portion of a file into kernel memory. + * Cross block boundaries when necessary. + */ +int +minix_read_file(fp, offset, start, size, resid) + register struct file *fp; + vm_offset_t offset; + vm_offset_t start; + vm_size_t size; + vm_size_t *resid; /* out */ +{ + int rc; + register vm_size_t csize; + vm_offset_t buf; + vm_size_t buf_size; + + while (size != 0) { + rc = buf_read_file(fp, offset, &buf, &buf_size); + if (rc) + return (rc); + + csize = size; + if (csize > buf_size) + csize = buf_size; + if (csize == 0) + break; + + bcopy((char *)buf, (char *)start, csize); + + offset += csize; + start += csize; + size -= csize; + } + if (resid) + *resid = size; + + return (0); +} + +/* simple utility: only works for 2^n */ +static int +log2(n) + register unsigned int n; +{ + register int i = 0; + + while ((n & 1) == 0) { + i++; + n >>= 1; + } + return i; +} + +/* + * Make an empty file_direct for a device. + */ +int +minix_open_file_direct(dev, fdp, is_structured) + mach_port_t dev; + register struct file_direct *fdp; + boolean_t is_structured; +{ + struct minix_super_block *fs; + int rc; + + if (!is_structured) { + fdp->fd_dev = dev; + fdp->fd_blocks = (daddr_t *) 0; + fdp->fd_bsize = vm_page_size; + fdp->fd_bshift = log2(vm_page_size); + fdp->fd_fsbtodb = 0; /* later */ + fdp->fd_size = 0; /* later */ + return 0; + } + + rc = read_fs(dev, &fs); + if (rc) + return rc; + + fdp->fd_dev = dev; + fdp->fd_blocks = (daddr_t *) 0; + fdp->fd_size = 0; + fdp->fd_bsize = MINIX_BLOCK_SIZE; + fdp->fd_bshift = log2(fdp->fd_bsize); + fdp->fd_fsbtodb = log2(fdp->fd_bsize / DEV_BSIZE); + + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fs, + MINIX_SBSIZE); + + return 0; +} + +/* + * Add blocks from a file to a file_direct. + */ +int +minix_add_file_direct(fdp, fp) + register struct file_direct *fdp; + register struct file *fp; +{ + register struct minix_super_block *fs; + long num_blocks, i; + vm_offset_t buffer; + vm_size_t size; + int rc; + + /* the file must be on the same device */ + + if (fdp->fd_dev != fp->f_dev) + return FS_INVALID_FS; + + if (!file_is_structured(fp)) { + int result[DEV_GET_SIZE_COUNT]; + natural_t count; + + count = DEV_GET_SIZE_COUNT; + rc = device_get_status( fdp->fd_dev, DEV_GET_SIZE, + result, &count); + if (rc) + return rc; + fdp->fd_size = result[DEV_GET_SIZE_DEVICE_SIZE] >> fdp->fd_bshift; + fdp->fd_fsbtodb = log2(fdp->fd_bsize/result[DEV_GET_SIZE_RECORD_SIZE]); + return 0; + } + + /* it must hold a file system */ + + fs = fp->f_fs; +/* + if (fdp->fd_bsize != fs->fs_bsize || + fdp->fd_fsbtodb != fs->fs_fsbtodb) +*/ + if (fdp->fd_bsize != MINIX_BLOCK_SIZE) + return FS_INVALID_FS; + + /* calculate number of blocks in the file, ignoring fragments */ + + num_blocks = minix_lblkno(fs, fp->i_ic.i_size); + + /* allocate memory for a bigger array */ + + size = (num_blocks + fdp->fd_size) * sizeof(minix_daddr_t); + rc = vm_allocate(mach_task_self(), &buffer, size, TRUE); + if (rc != KERN_SUCCESS) + return rc; + + /* lookup new block addresses */ + + for (i = 0; i < num_blocks; i++) { + minix_daddr_t disk_block; + + rc = block_map(fp, (minix_daddr_t) i, &disk_block); + if (rc != 0) { + (void) vm_deallocate(mach_task_self(), buffer, size); + return rc; + } + + ((minix_daddr_t *) buffer)[fdp->fd_size + i] = disk_block; + } + + /* copy old addresses and install the new array */ + + if (fdp->fd_blocks != 0) { + bcopy((char *) fdp->fd_blocks, (char *) buffer, + fdp->fd_size * sizeof(minix_daddr_t)); + + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fdp->fd_blocks, + (vm_size_t) (fdp->fd_size * sizeof(minix_daddr_t))); + } + fdp->fd_blocks = (daddr_t *) buffer; + fdp->fd_size += num_blocks; + + /* deallocate cached blocks */ + + free_file_buffers(fp); + + return 0; +} + +int +minix_remove_file_direct(fdp) + struct file_direct *fdp; +{ + if (fdp->fd_blocks) + (void) vm_deallocate(mach_task_self(), + (vm_offset_t) fdp->fd_blocks, + (vm_size_t) (fdp->fd_size * sizeof(minix_daddr_t))); + fdp->fd_blocks = 0; /* sanity */ + /* xxx should lose a ref to fdp->fd_dev here (and elsewhere) xxx */ +} diff --git a/serverboot/minix_fs.h b/serverboot/minix_fs.h new file mode 100644 index 00000000..678f3a0d --- /dev/null +++ b/serverboot/minix_fs.h @@ -0,0 +1,107 @@ +/* + * minix_fs.h + * stolen (and slightly extended by csb) from the Linux distribution + * Copyright (C) 1994 Linus Torvalds + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LINUX_MINIX_FS_H +#define _LINUX_MINIX_FS_H + +/* + * The minix filesystem constants/structures + */ + +/* + * Thanks to Kees J Bot for sending me the definitions of the new + * minix filesystem (aka V2) with bigger inodes and 32-bit block + * pointers. It's not actually implemented yet, but I'll look into + * it. + */ + +#define MINIX_ROOT_INO 1 + +/* Not the same as the bogus LINK_MAX in <linux/limits.h>. Oh well. */ +#define MINIX_LINK_MAX 250 + +#define MINIX_I_MAP_SLOTS 8 +#define MINIX_Z_MAP_SLOTS 8 +#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ +#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ +#define NEW_MINIX_SUPER_MAGIC 0x2468 /* minix V2 - not implemented */ +#define MINIX_VALID_FS 0x0001 /* Clean fs. */ +#define MINIX_ERROR_FS 0x0002 /* fs has errors. */ + +#define MINIX_INODES_PER_BLOCK ((MINIX_BLOCK_SIZE)/(sizeof (struct minix_inode))) + +struct minix_inode { + unsigned short i_mode; + unsigned short i_uid; + unsigned long i_size; + unsigned long i_time; + unsigned char i_gid; + unsigned char i_nlinks; + unsigned short i_zone[9]; +}; + +/* + * The new minix inode has all the time entries, as well as + * long block numbers and a third indirect block (7+1+1+1 + * instead of 7+1+1). Also, some previously 8-bit values are + * now 16-bit. The inode is now 64 bytes instead of 32. + */ +struct new_minix_inode { + unsigned short i_mode; + unsigned short i_nlinks; + unsigned short i_uid; + unsigned short i_gid; + unsigned long i_size; + unsigned long i_atime; + unsigned long i_mtime; + unsigned long i_ctime; + unsigned long i_zone[10]; +}; + +/* + * minix super-block data on disk + */ +struct minix_super_block { + unsigned short s_ninodes; + unsigned short s_nzones; + unsigned short s_imap_blocks; + unsigned short s_zmap_blocks; + unsigned short s_firstdatazone; + unsigned short s_log_zone_size; + unsigned long s_max_size; + unsigned short s_magic; + unsigned short s_state; +}; + +struct minix_dir_entry { + unsigned short inode; + char name[0]; +}; + +struct minix_directory_entry { + unsigned short inode; + char name[14]; +}; + +#define MINIX_NIADDR 2 + +typedef unsigned short minix_daddr_t; + +#endif diff --git a/serverboot/minix_super.h b/serverboot/minix_super.h new file mode 100644 index 00000000..144cf064 --- /dev/null +++ b/serverboot/minix_super.h @@ -0,0 +1,49 @@ +/* + * minix_super.h + * stolen from the Linux distribution + * Copyright (C) 1994 Linus Torvalds + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LINUX_MINIX_FS_H +#define _LINUX_MINIX_FS_H + +struct minix_super_block { + unsigned short s_ninodes; + unsigned short s_nzones; + unsigned short s_imap_blocks; + unsigned short s_zmap_blocks; + unsigned short s_firstdatazone; + unsigned short s_log_zone_size; + unsigned long s_max_size; + unsigned short s_magic; + unsigned short s_state; +}; + + +struct minix_inode { + unsigned short i_mode; + unsigned short i_uid; + unsigned long i_size; + unsigned long i_time; + unsigned char i_gid; + unsigned char i_nlinks; + unsigned short i_zone[9]; +}; + +#define MINIX_NIADDR 2 + +#endif diff --git a/serverboot/panic.c b/serverboot/panic.c new file mode 100644 index 00000000..87428429 --- /dev/null +++ b/serverboot/panic.c @@ -0,0 +1,60 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +#include <mach/port.h> +#include <varargs.h> +#include <stdio.h> +#include <errno.h> + +static mach_port_t master_host_port; + +panic_init(port) + mach_port_t port; +{ + master_host_port = port; +} + +/*VARARGS1*/ +panic(s, va_alist) + char *s; + va_dcl +{ + va_list listp; + + clearerr (stdout); + printf("%s: panic: ", program_invocation_name); + va_start(listp); + vprintf(s, listp); + va_end(listp); + printf("\n"); + +#ifdef PC532 + { int l; for (l=0;l < 1000000;l++) ; } +#endif PC532 +#define RB_DEBUGGER 0x1000 /* enter debugger NOW */ + (void) host_reboot(master_host_port, RB_DEBUGGER); + for (;;); +} diff --git a/serverboot/queue.h b/serverboot/queue.h new file mode 100644 index 00000000..3e93476f --- /dev/null +++ b/serverboot/queue.h @@ -0,0 +1,316 @@ +/* + * Mach Operating System + * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon rights + * to redistribute these changes. + */ +/* + * File: queue.h + * Author: Avadis Tevanian, Jr. + * Date: 1985 + * + * Type definitions for generic queues. + * + */ + +#ifndef _QUEUE_H_ +#define _QUEUE_H_ + +/* + * Queue of abstract objects. Queue is maintained + * within that object. + * + * Supports fast removal from within the queue. + * + * How to declare a queue of elements of type "foo_t": + * In the "*foo_t" type, you must have a field of + * type "queue_chain_t" to hold together this queue. + * There may be more than one chain through a + * "foo_t", for use by different queues. + * + * Declare the queue as a "queue_t" type. + * + * Elements of the queue (of type "foo_t", that is) + * are referred to by reference, and cast to type + * "queue_entry_t" within this module. + */ + +/* + * A generic doubly-linked list (queue). + */ + +struct queue_entry { + struct queue_entry *next; /* next element */ + struct queue_entry *prev; /* previous element */ +}; + +typedef struct queue_entry *queue_t; +typedef struct queue_entry queue_head_t; +typedef struct queue_entry queue_chain_t; +typedef struct queue_entry *queue_entry_t; + +/* + * Macro: queue_init + * Function: + * Initialize the given queue. + * Header: + * void queue_init(q) + * queue_t q; / * MODIFIED * / + */ +#define queue_init(q) ((q)->next = (q)->prev = q) + +/* + * Macro: queue_first + * Function: + * Returns the first entry in the queue, + * Header: + * queue_entry_t queue_first(q) + * queue_t q; / * IN * / + */ +#define queue_first(q) ((q)->next) + +/* + * Macro: queue_next + * Function: + * Returns the entry after an item in the queue. + * Header: + * queue_entry_t queue_next(qc) + * queue_t qc; + */ +#define queue_next(qc) ((qc)->next) + +/* + * Macro: queue_last + * Function: + * Returns the last entry in the queue. + * Header: + * queue_entry_t queue_last(q) + * queue_t q; / * IN * / + */ +#define queue_last(q) ((q)->prev) + +/* + * Macro: queue_prev + * Function: + * Returns the entry before an item in the queue. + * Header: + * queue_entry_t queue_prev(qc) + * queue_t qc; + */ +#define queue_prev(qc) ((qc)->prev) + +/* + * Macro: queue_end + * Function: + * Tests whether a new entry is really the end of + * the queue. + * Header: + * boolean_t queue_end(q, qe) + * queue_t q; + * queue_entry_t qe; + */ +#define queue_end(q, qe) ((q) == (qe)) + +/* + * Macro: queue_empty + * Function: + * Tests whether a queue is empty. + * Header: + * boolean_t queue_empty(q) + * queue_t q; + */ +#define queue_empty(q) queue_end((q), queue_first(q)) + + +/*----------------------------------------------------------------*/ +/* + * Macros that operate on generic structures. The queue + * chain may be at any location within the structure, and there + * may be more than one chain. + */ + +/* + * Macro: queue_enter + * Function: + * Insert a new element at the tail of the queue. + * Header: + * void queue_enter(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_enter(head, elt, type, field) \ +{ \ + register queue_entry_t prev; \ + \ + prev = (head)->prev; \ + if ((head) == prev) { \ + (head)->next = (queue_entry_t) (elt); \ + } \ + else { \ + ((type)prev)->field.next = (queue_entry_t)(elt);\ + } \ + (elt)->field.prev = prev; \ + (elt)->field.next = head; \ + (head)->prev = (queue_entry_t) elt; \ +} + +/* + * Macro: queue_enter_first + * Function: + * Insert a new element at the head of the queue. + * Header: + * void queue_enter_first(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_enter_first(head, elt, type, field) \ +{ \ + register queue_entry_t next; \ + \ + next = (head)->next; \ + if ((head) == next) { \ + (head)->prev = (queue_entry_t) (elt); \ + } \ + else { \ + ((type)next)->field.prev = (queue_entry_t)(elt);\ + } \ + (elt)->field.next = next; \ + (elt)->field.prev = head; \ + (head)->next = (queue_entry_t) elt; \ +} + +/* + * Macro: queue_field [internal use only] + * Function: + * Find the queue_chain_t (or queue_t) for the + * given element (thing) in the given queue (head) + */ +#define queue_field(head, thing, type, field) \ + (((head) == (thing)) ? (head) : &((type)(thing))->field) + +/* + * Macro: queue_remove + * Function: + * Remove an arbitrary item from the queue. + * Header: + * void queue_remove(q, qe, type, field) + * arguments as in queue_enter + */ +#define queue_remove(head, elt, type, field) \ +{ \ + register queue_entry_t next, prev; \ + \ + next = (elt)->field.next; \ + prev = (elt)->field.prev; \ + \ + if ((head) == next) \ + (head)->prev = prev; \ + else \ + ((type)next)->field.prev = prev; \ + \ + if ((head) == prev) \ + (head)->next = next; \ + else \ + ((type)prev)->field.next = next; \ +} + +/* + * Macro: queue_remove_first + * Function: + * Remove and return the entry at the head of + * the queue. + * Header: + * queue_remove_first(head, entry, type, field) + * entry is returned by reference + */ +#define queue_remove_first(head, entry, type, field) \ +{ \ + register queue_entry_t next; \ + \ + (entry) = (type) ((head)->next); \ + next = (entry)->field.next; \ + \ + if ((head) == next) \ + (head)->prev = (head); \ + else \ + ((type)(next))->field.prev = (head); \ + (head)->next = next; \ +} + +/* + * Macro: queue_remove_last + * Function: + * Remove and return the entry at the tail of + * the queue. + * Header: + * queue_remove_last(head, entry, type, field) + * entry is returned by reference + */ +#define queue_remove_last(head, entry, type, field) \ +{ \ + register queue_entry_t prev; \ + \ + (entry) = (type) ((head)->prev); \ + prev = (entry)->field.prev; \ + \ + if ((head) == prev) \ + (head)->next = (head); \ + else \ + ((type)(prev))->field.next = (head); \ + (head)->prev = prev; \ +} + +/* + * Macro: queue_assign + */ +#define queue_assign(to, from, type, field) \ +{ \ + ((type)((from)->prev))->field.next = (to); \ + ((type)((from)->next))->field.prev = (to); \ + *to = *from; \ +} + +/* + * Macro: queue_iterate + * Function: + * iterate over each item in the queue. + * Generates a 'for' loop, setting elt to + * each item in turn (by reference). + * Header: + * queue_iterate(q, elt, type, field) + * queue_t q; + * <type> elt; + * <type> is what's in our queue + * <field> is the chain field in (*<type>) + */ +#define queue_iterate(head, elt, type, field) \ + for ((elt) = (type) queue_first(head); \ + !queue_end((head), (queue_entry_t)(elt)); \ + (elt) = (type) queue_next(&(elt)->field)) + + + +#endif _QUEUE_H_ diff --git a/serverboot/strfcns.c b/serverboot/strfcns.c new file mode 100644 index 00000000..82a76728 --- /dev/null +++ b/serverboot/strfcns.c @@ -0,0 +1,75 @@ +/* + * Mach Operating System + * Copyright (c) 1991 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Character subroutines + */ + +#include <varargs.h> + +#define EXPORT_BOOLEAN +#include <mach/boolean.h> + +/* + * Concatenate a group of strings together into a buffer. + * Return a pointer to the trailing '\0' character in + * the result string. + * The list of strings ends with a '(char *)0'. + */ +/*VARARGS1*/ +char * +strbuild(dest, va_alist) + register char * dest; + va_dcl +{ + va_list argptr; + register char * src; + register int c; + + va_start(argptr); + while ((src = va_arg(argptr, char *)) != (char *)0) { + + while ((c = *src++) != '\0') + *dest++ = c; + } + *dest = '\0'; + return (dest); +} + +/* + * Return TRUE if string 2 is a prefix of string 1. + */ +boolean_t +strprefix(s1, s2) + register char *s1, *s2; +{ + register int c; + + while ((c = *s2++) != '\0') { + if (c != *s1++) + return (FALSE); + } + return (TRUE); +} diff --git a/serverboot/wiring.c b/serverboot/wiring.c new file mode 100644 index 00000000..585a3075 --- /dev/null +++ b/serverboot/wiring.c @@ -0,0 +1,175 @@ +/* + * Mach Operating System + * Copyright (c) 1991 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ +/* + * Package to wire current task's memory. + */ +#include <mach.h> +#include <mach_init.h> +#include <mach/machine/vm_param.h> + +mach_port_t this_task; /* our task */ +mach_port_t priv_host_port = MACH_PORT_NULL; + /* the privileged host port */ + +void +wire_setup(host_priv) + mach_port_t host_priv; +{ + priv_host_port = host_priv; + this_task = mach_task_self(); +} + +void +wire_memory(start, size, prot) + vm_address_t start; + vm_size_t size; + vm_prot_t prot; +{ + kern_return_t kr; + + if (priv_host_port == MACH_PORT_NULL) + return; + + kr = vm_wire(priv_host_port, + this_task, + start, size, prot); + if (kr != KERN_SUCCESS) + panic("mem_wire: %d", kr); +} + +void +wire_thread() +{ + kern_return_t kr; + + if (priv_host_port == MACH_PORT_NULL) + return; + + kr = thread_wire(priv_host_port, + mach_thread_self(), + TRUE); + if (kr != KERN_SUCCESS) + panic("wire_thread: %d", kr); +} + +void +wire_all_memory() +{ + register kern_return_t kr; + vm_offset_t address; + vm_size_t size; + vm_prot_t protection; + vm_prot_t max_protection; + vm_inherit_t inheritance; + boolean_t is_shared; + memory_object_name_t object; + vm_offset_t offset; + + if (priv_host_port == MACH_PORT_NULL) + return; + + /* iterate thru all regions, wiring */ + address = 0; + while ( + (kr = vm_region(this_task, &address, + &size, + &protection, + &max_protection, + &inheritance, + &is_shared, + &object, + &offset)) + == KERN_SUCCESS) + { + if (MACH_PORT_VALID(object)) + (void) mach_port_deallocate(this_task, object); + if (protection != VM_PROT_NONE) + { + /* The VM system cannot cope with a COW fault on another + unrelated virtual copy happening later when we have + wired down the original page. So we must touch all our + pages before wiring to make sure that only we will ever + use them. */ + void *page; + if (!(protection & VM_PROT_WRITE)) + { + kr = vm_protect(this_task, address, size, + 0, max_protection); + } + for (page = (void *) address; + page < (void *) (address + size); + page += vm_page_size) + *(volatile int *) page = *(int *) page; + + wire_memory(address, size, protection); + + if (!(protection & VM_PROT_WRITE)) + { + kr = vm_protect(this_task, address, size, + 0, protection); + } + } + address += size; + } +} + +/* + * Alias for vm_allocate to return wired memory. + */ +kern_return_t +vm_allocate(task, address, size, anywhere) + task_t task; + vm_address_t *address; + vm_size_t size; + boolean_t anywhere; +{ + kern_return_t kr; + + if (anywhere) + *address = VM_MIN_ADDRESS; + kr = vm_map(task, + address, size, (vm_offset_t) 0, anywhere, + MEMORY_OBJECT_NULL, (vm_offset_t)0, FALSE, + VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); + if (kr != KERN_SUCCESS) + return kr; + + if (task == this_task) + (void) vm_wire(priv_host_port, task, *address, size, + VM_PROT_DEFAULT); + return KERN_SUCCESS; +} + +/* Other versions of this function in libc... */ +kern_return_t +__vm_allocate (task, address, size, anywhere) + task_t task; + vm_address_t *address; + vm_size_t size; + boolean_t anywhere; +{ + return vm_allocate (task, address, size, anywhere); +} diff --git a/serverboot/wiring.h b/serverboot/wiring.h new file mode 100644 index 00000000..b5f8e53f --- /dev/null +++ b/serverboot/wiring.h @@ -0,0 +1,35 @@ +/* + * Mach Operating System + * Copyright (c) 1991 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon the + * rights to redistribute these changes. + */ +/* + * Package to wire current task's memory. + */ +#include <mach.h> +#include <mach_init.h> + +extern void wire_setup(/* mach_port_t host_priv */); +extern void wire_memory(/* vm_address_t, vm_size_t, vm_prot_t */); +extern void wire_thread(); +extern void wire_all_memory(); |